]>
Commit | Line | Data |
---|---|---|
44037a66 TG |
1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
99dee823 | 3 | Copyright (C) 1987-2021 Free Software Foundation, Inc. |
44037a66 | 4 | |
1322177d | 5 | This file is part of GCC. |
44037a66 | 6 | |
1322177d LB |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
1322177d | 10 | version. |
44037a66 | 11 | |
1322177d LB |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
44037a66 TG |
16 | |
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
44037a66 | 20 | |
b63566a4 JM |
21 | /* Work around tree-optimization/91825. */ |
22 | #pragma GCC diagnostic warning "-Wmaybe-uninitialized" | |
44037a66 TG |
23 | |
24 | #include "config.h" | |
670ee920 | 25 | #include "system.h" |
4977bab6 | 26 | #include "coretypes.h" |
c7131fb2 | 27 | #include "backend.h" |
957060b5 | 28 | #include "target.h" |
44037a66 | 29 | #include "rtl.h" |
957060b5 AM |
30 | #include "tree.h" |
31 | #include "predict.h" | |
4d0cdd0c | 32 | #include "memmodel.h" |
957060b5 | 33 | #include "tm_p.h" |
957060b5 | 34 | #include "optabs.h" |
e34153b0 | 35 | #include "expmed.h" |
a93072ca | 36 | #include "regs.h" |
957060b5 | 37 | #include "emit-rtl.h" |
c7131fb2 | 38 | #include "diagnostic-core.h" |
40e23961 | 39 | #include "fold-const.h" |
d8a2d370 | 40 | #include "stor-layout.h" |
36566b39 PK |
41 | #include "dojump.h" |
42 | #include "explow.h" | |
44037a66 | 43 | #include "expr.h" |
b0c48229 | 44 | #include "langhooks.h" |
5ebaa477 | 45 | #include "tree-vector-builder.h" |
462f85ce RS |
46 | |
47 | struct target_expmed default_target_expmed; | |
48 | #if SWITCHABLE_TARGET | |
49 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
50 | #endif | |
44037a66 | 51 | |
2d7b38df RS |
52 | static bool store_integral_bit_field (rtx, opt_scalar_int_mode, |
53 | unsigned HOST_WIDE_INT, | |
54 | unsigned HOST_WIDE_INT, | |
8c59e5e7 | 55 | poly_uint64, poly_uint64, |
2d7b38df | 56 | machine_mode, rtx, bool, bool); |
1a527092 | 57 | static void store_fixed_bit_field (rtx, opt_scalar_int_mode, |
502b8322 | 58 | unsigned HOST_WIDE_INT, |
1169e45d | 59 | unsigned HOST_WIDE_INT, |
8c59e5e7 | 60 | poly_uint64, poly_uint64, |
1a527092 RS |
61 | rtx, scalar_int_mode, bool); |
62 | static void store_fixed_bit_field_1 (rtx, scalar_int_mode, | |
63 | unsigned HOST_WIDE_INT, | |
ebb99f96 | 64 | unsigned HOST_WIDE_INT, |
1a527092 RS |
65 | rtx, scalar_int_mode, bool); |
66 | static void store_split_bit_field (rtx, opt_scalar_int_mode, | |
1169e45d AH |
67 | unsigned HOST_WIDE_INT, |
68 | unsigned HOST_WIDE_INT, | |
8c59e5e7 | 69 | poly_uint64, poly_uint64, |
1a527092 | 70 | rtx, scalar_int_mode, bool); |
fc60a416 RS |
71 | static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode, |
72 | unsigned HOST_WIDE_INT, | |
73 | unsigned HOST_WIDE_INT, int, rtx, | |
508fa61b | 74 | machine_mode, machine_mode, bool, bool); |
1a527092 | 75 | static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode, |
502b8322 | 76 | unsigned HOST_WIDE_INT, |
ee45a32d | 77 | unsigned HOST_WIDE_INT, rtx, int, bool); |
1a527092 | 78 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode, |
6f4e9cf8 | 79 | unsigned HOST_WIDE_INT, |
ee45a32d | 80 | unsigned HOST_WIDE_INT, rtx, int, bool); |
ef4bddc2 | 81 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
1a527092 RS |
82 | static rtx extract_split_bit_field (rtx, opt_scalar_int_mode, |
83 | unsigned HOST_WIDE_INT, | |
ee45a32d | 84 | unsigned HOST_WIDE_INT, int, bool); |
ef4bddc2 | 85 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
095a2d76 RS |
86 | static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |
87 | static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); | |
44037a66 | 88 | |
807e902e KZ |
89 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
90 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. | |
91 | The mask is truncated if necessary to the width of mode MODE. The | |
92 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ | |
93 | ||
94 | static inline rtx | |
095a2d76 | 95 | mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement) |
807e902e KZ |
96 | { |
97 | return immed_wide_int_const | |
98 | (wi::shifted_mask (bitpos, bitsize, complement, | |
99 | GET_MODE_PRECISION (mode)), mode); | |
100 | } | |
101 | ||
58b42e19 | 102 | /* Test whether a value is zero of a power of two. */ |
be63b77d | 103 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
fecfbfa4 | 104 | (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) |
58b42e19 | 105 | |
84ddb681 | 106 | struct init_expmed_rtl |
44037a66 | 107 | { |
c83cf304 JJ |
108 | rtx reg; |
109 | rtx plus; | |
110 | rtx neg; | |
111 | rtx mult; | |
112 | rtx sdiv; | |
113 | rtx udiv; | |
114 | rtx sdiv_32; | |
115 | rtx smod_32; | |
116 | rtx wide_mult; | |
117 | rtx wide_lshr; | |
118 | rtx wide_trunc; | |
119 | rtx shift; | |
120 | rtx shift_mult; | |
121 | rtx shift_add; | |
122 | rtx shift_sub0; | |
123 | rtx shift_sub1; | |
124 | rtx zext; | |
125 | rtx trunc; | |
79b4a8dc | 126 | |
965703ed RS |
127 | rtx pow2[MAX_BITS_PER_WORD]; |
128 | rtx cint[MAX_BITS_PER_WORD]; | |
84ddb681 RH |
129 | }; |
130 | ||
91f8035e | 131 | static void |
095a2d76 RS |
132 | init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode, |
133 | scalar_int_mode from_mode, bool speed) | |
91f8035e RH |
134 | { |
135 | int to_size, from_size; | |
136 | rtx which; | |
137 | ||
50b6ee8b DD |
138 | to_size = GET_MODE_PRECISION (to_mode); |
139 | from_size = GET_MODE_PRECISION (from_mode); | |
140 | ||
141 | /* Most partial integers have a precision less than the "full" | |
142 | integer it requires for storage. In case one doesn't, for | |
143 | comparison purposes here, reduce the bit size by one in that | |
144 | case. */ | |
145 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT | |
146ec50f | 146 | && pow2p_hwi (to_size)) |
50b6ee8b DD |
147 | to_size --; |
148 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT | |
146ec50f | 149 | && pow2p_hwi (from_size)) |
50b6ee8b | 150 | from_size --; |
91f8035e RH |
151 | |
152 | /* Assume cost of zero-extend and sign-extend is the same. */ | |
c83cf304 | 153 | which = (to_size < from_size ? all->trunc : all->zext); |
91f8035e | 154 | |
c83cf304 | 155 | PUT_MODE (all->reg, from_mode); |
e548c9df AM |
156 | set_convert_cost (to_mode, from_mode, speed, |
157 | set_src_cost (which, to_mode, speed)); | |
71197a5d RS |
158 | /* Restore all->reg's mode. */ |
159 | PUT_MODE (all->reg, to_mode); | |
91f8035e RH |
160 | } |
161 | ||
84ddb681 RH |
162 | static void |
163 | init_expmed_one_mode (struct init_expmed_rtl *all, | |
ef4bddc2 | 164 | machine_mode mode, int speed) |
84ddb681 RH |
165 | { |
166 | int m, n, mode_bitsize; | |
ef4bddc2 | 167 | machine_mode mode_from; |
44037a66 | 168 | |
84ddb681 | 169 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
38a448ca | 170 | |
c83cf304 JJ |
171 | PUT_MODE (all->reg, mode); |
172 | PUT_MODE (all->plus, mode); | |
173 | PUT_MODE (all->neg, mode); | |
174 | PUT_MODE (all->mult, mode); | |
175 | PUT_MODE (all->sdiv, mode); | |
176 | PUT_MODE (all->udiv, mode); | |
177 | PUT_MODE (all->sdiv_32, mode); | |
178 | PUT_MODE (all->smod_32, mode); | |
179 | PUT_MODE (all->wide_trunc, mode); | |
180 | PUT_MODE (all->shift, mode); | |
181 | PUT_MODE (all->shift_mult, mode); | |
182 | PUT_MODE (all->shift_add, mode); | |
183 | PUT_MODE (all->shift_sub0, mode); | |
184 | PUT_MODE (all->shift_sub1, mode); | |
185 | PUT_MODE (all->zext, mode); | |
186 | PUT_MODE (all->trunc, mode); | |
187 | ||
e548c9df AM |
188 | set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); |
189 | set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); | |
190 | set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); | |
191 | set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); | |
192 | set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); | |
c83cf304 | 193 | |
e548c9df | 194 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) |
5322d07e | 195 | <= 2 * add_cost (speed, mode))); |
e548c9df | 196 | set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) |
5322d07e NF |
197 | <= 4 * add_cost (speed, mode))); |
198 | ||
199 | set_shift_cost (speed, mode, 0, 0); | |
200 | { | |
201 | int cost = add_cost (speed, mode); | |
202 | set_shiftadd_cost (speed, mode, 0, cost); | |
203 | set_shiftsub0_cost (speed, mode, 0, cost); | |
204 | set_shiftsub1_cost (speed, mode, 0, cost); | |
205 | } | |
84ddb681 RH |
206 | |
207 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); | |
208 | for (m = 1; m < n; m++) | |
209 | { | |
c83cf304 JJ |
210 | XEXP (all->shift, 1) = all->cint[m]; |
211 | XEXP (all->shift_mult, 1) = all->pow2[m]; | |
84ddb681 | 212 | |
e548c9df AM |
213 | set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); |
214 | set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, | |
215 | speed)); | |
216 | set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, | |
217 | speed)); | |
218 | set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, | |
219 | speed)); | |
84ddb681 RH |
220 | } |
221 | ||
b0567726 RS |
222 | scalar_int_mode int_mode_to; |
223 | if (is_a <scalar_int_mode> (mode, &int_mode_to)) | |
965703ed | 224 | { |
91f8035e | 225 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
ef4bddc2 | 226 | mode_from = (machine_mode)(mode_from + 1)) |
c7ad039d RS |
227 | init_expmed_one_conv (all, int_mode_to, |
228 | as_a <scalar_int_mode> (mode_from), speed); | |
490d0f6c | 229 | |
b0567726 RS |
230 | scalar_int_mode wider_mode; |
231 | if (GET_MODE_CLASS (int_mode_to) == MODE_INT | |
232 | && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode)) | |
84ddb681 | 233 | { |
71197a5d | 234 | PUT_MODE (all->reg, mode); |
c83cf304 JJ |
235 | PUT_MODE (all->zext, wider_mode); |
236 | PUT_MODE (all->wide_mult, wider_mode); | |
237 | PUT_MODE (all->wide_lshr, wider_mode); | |
abd3c800 RS |
238 | XEXP (all->wide_lshr, 1) |
239 | = gen_int_shift_amount (wider_mode, mode_bitsize); | |
84ddb681 | 240 | |
91f8035e | 241 | set_mul_widen_cost (speed, wider_mode, |
e548c9df | 242 | set_src_cost (all->wide_mult, wider_mode, speed)); |
b0567726 RS |
243 | set_mul_highpart_cost (speed, int_mode_to, |
244 | set_src_cost (all->wide_trunc, | |
245 | int_mode_to, speed)); | |
84ddb681 | 246 | } |
965703ed | 247 | } |
84ddb681 RH |
248 | } |
249 | ||
250 | void | |
251 | init_expmed (void) | |
252 | { | |
253 | struct init_expmed_rtl all; | |
ef4bddc2 | 254 | machine_mode mode = QImode; |
84ddb681 RH |
255 | int m, speed; |
256 | ||
79b4a8dc | 257 | memset (&all, 0, sizeof all); |
84ddb681 RH |
258 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
259 | { | |
fecfbfa4 | 260 | all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); |
84ddb681 RH |
261 | all.cint[m] = GEN_INT (m); |
262 | } | |
79b4a8dc | 263 | |
1d27fed4 | 264 | /* Avoid using hard regs in ways which may be unsupported. */ |
9fccb335 | 265 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
c83cf304 JJ |
266 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |
267 | all.neg = gen_rtx_NEG (mode, all.reg); | |
268 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
269 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); | |
270 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); | |
271 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); | |
272 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); | |
273 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); | |
274 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); | |
275 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); | |
276 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); | |
277 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); | |
278 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
279 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); | |
280 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); | |
281 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); | |
282 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); | |
6dd8f4bb | 283 | |
f40751dd | 284 | for (speed = 0; speed < 2; speed++) |
71af73bb | 285 | { |
f40751dd | 286 | crtl->maybe_hot_insn_p = speed; |
e548c9df | 287 | set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); |
79b4a8dc | 288 | |
91f8035e | 289 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
ef4bddc2 | 290 | mode = (machine_mode)(mode + 1)) |
84ddb681 | 291 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 292 | |
91f8035e RH |
293 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
294 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; | |
ef4bddc2 | 295 | mode = (machine_mode)(mode + 1)) |
91f8035e RH |
296 | init_expmed_one_mode (&all, mode, speed); |
297 | ||
298 | if (MIN_MODE_VECTOR_INT != VOIDmode) | |
299 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; | |
ef4bddc2 | 300 | mode = (machine_mode)(mode + 1)) |
91f8035e | 301 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 302 | } |
84ddb681 | 303 | |
5322d07e NF |
304 | if (alg_hash_used_p ()) |
305 | { | |
306 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); | |
307 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); | |
308 | } | |
c371bb73 | 309 | else |
5322d07e | 310 | set_alg_hash_used_p (true); |
f40751dd | 311 | default_rtl_profile (); |
c83cf304 JJ |
312 | |
313 | ggc_free (all.trunc); | |
314 | ggc_free (all.shift_sub1); | |
315 | ggc_free (all.shift_sub0); | |
316 | ggc_free (all.shift_add); | |
317 | ggc_free (all.shift_mult); | |
318 | ggc_free (all.shift); | |
319 | ggc_free (all.wide_trunc); | |
320 | ggc_free (all.wide_lshr); | |
321 | ggc_free (all.wide_mult); | |
322 | ggc_free (all.zext); | |
323 | ggc_free (all.smod_32); | |
324 | ggc_free (all.sdiv_32); | |
325 | ggc_free (all.udiv); | |
326 | ggc_free (all.sdiv); | |
327 | ggc_free (all.mult); | |
328 | ggc_free (all.neg); | |
329 | ggc_free (all.plus); | |
330 | ggc_free (all.reg); | |
44037a66 TG |
331 | } |
332 | ||
333 | /* Return an rtx representing minus the value of X. | |
334 | MODE is the intended mode of the result, | |
335 | useful if X is a CONST_INT. */ | |
336 | ||
337 | rtx | |
ef4bddc2 | 338 | negate_rtx (machine_mode mode, rtx x) |
44037a66 | 339 | { |
a39a7484 RK |
340 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
341 | ||
fdb5537f | 342 | if (result == 0) |
a39a7484 RK |
343 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
344 | ||
345 | return result; | |
44037a66 | 346 | } |
da920570 | 347 | |
ee45a32d EB |
348 | /* Whether reverse storage order is supported on the target. */ |
349 | static int reverse_storage_order_supported = -1; | |
350 | ||
351 | /* Check whether reverse storage order is supported on the target. */ | |
352 | ||
353 | static void | |
354 | check_reverse_storage_order_support (void) | |
355 | { | |
356 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
357 | { | |
358 | reverse_storage_order_supported = 0; | |
359 | sorry ("reverse scalar storage order"); | |
360 | } | |
361 | else | |
362 | reverse_storage_order_supported = 1; | |
363 | } | |
364 | ||
365 | /* Whether reverse FP storage order is supported on the target. */ | |
366 | static int reverse_float_storage_order_supported = -1; | |
367 | ||
368 | /* Check whether reverse FP storage order is supported on the target. */ | |
369 | ||
370 | static void | |
371 | check_reverse_float_storage_order_support (void) | |
372 | { | |
373 | if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
374 | { | |
375 | reverse_float_storage_order_supported = 0; | |
376 | sorry ("reverse floating-point scalar storage order"); | |
377 | } | |
378 | else | |
379 | reverse_float_storage_order_supported = 1; | |
380 | } | |
381 | ||
382 | /* Return an rtx representing value of X with reverse storage order. | |
383 | MODE is the intended mode of the result, | |
384 | useful if X is a CONST_INT. */ | |
385 | ||
386 | rtx | |
b8506a8a | 387 | flip_storage_order (machine_mode mode, rtx x) |
ee45a32d | 388 | { |
fffbab82 | 389 | scalar_int_mode int_mode; |
ee45a32d EB |
390 | rtx result; |
391 | ||
392 | if (mode == QImode) | |
393 | return x; | |
394 | ||
395 | if (COMPLEX_MODE_P (mode)) | |
396 | { | |
397 | rtx real = read_complex_part (x, false); | |
398 | rtx imag = read_complex_part (x, true); | |
399 | ||
400 | real = flip_storage_order (GET_MODE_INNER (mode), real); | |
401 | imag = flip_storage_order (GET_MODE_INNER (mode), imag); | |
402 | ||
403 | return gen_rtx_CONCAT (mode, real, imag); | |
404 | } | |
405 | ||
406 | if (__builtin_expect (reverse_storage_order_supported < 0, 0)) | |
407 | check_reverse_storage_order_support (); | |
408 | ||
fffbab82 | 409 | if (!is_a <scalar_int_mode> (mode, &int_mode)) |
ee45a32d EB |
410 | { |
411 | if (FLOAT_MODE_P (mode) | |
412 | && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) | |
413 | check_reverse_float_storage_order_support (); | |
414 | ||
277ff340 EB |
415 | if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode) |
416 | || !targetm.scalar_mode_supported_p (int_mode)) | |
ee45a32d EB |
417 | { |
418 | sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); | |
419 | return x; | |
420 | } | |
421 | x = gen_lowpart (int_mode, x); | |
422 | } | |
423 | ||
424 | result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); | |
425 | if (result == 0) | |
426 | result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); | |
427 | ||
428 | if (int_mode != mode) | |
429 | result = gen_lowpart (mode, result); | |
430 | ||
431 | return result; | |
432 | } | |
433 | ||
5602f58c RS |
434 | /* If MODE is set, adjust bitfield memory MEM so that it points to the |
435 | first unit of mode MODE that contains a bitfield of size BITSIZE at | |
436 | bit position BITNUM. If MODE is not set, return a BLKmode reference | |
437 | to every byte in the bitfield. Set *NEW_BITNUM to the bit position | |
438 | of the field within the new memory. */ | |
26f8b976 RS |
439 | |
440 | static rtx | |
5602f58c | 441 | narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode, |
26f8b976 RS |
442 | unsigned HOST_WIDE_INT bitsize, |
443 | unsigned HOST_WIDE_INT bitnum, | |
444 | unsigned HOST_WIDE_INT *new_bitnum) | |
445 | { | |
5602f58c RS |
446 | scalar_int_mode imode; |
447 | if (mode.exists (&imode)) | |
448 | { | |
449 | unsigned int unit = GET_MODE_BITSIZE (imode); | |
450 | *new_bitnum = bitnum % unit; | |
451 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; | |
452 | return adjust_bitfield_address (mem, imode, offset); | |
453 | } | |
454 | else | |
26f8b976 RS |
455 | { |
456 | *new_bitnum = bitnum % BITS_PER_UNIT; | |
457 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; | |
458 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) | |
459 | / BITS_PER_UNIT); | |
5602f58c | 460 | return adjust_bitfield_address_size (mem, BLKmode, offset, size); |
26f8b976 RS |
461 | } |
462 | } | |
463 | ||
fcdd52b7 RS |
464 | /* The caller wants to perform insertion or extraction PATTERN on a |
465 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. | |
466 | BITREGION_START and BITREGION_END are as for store_bit_field | |
467 | and FIELDMODE is the natural mode of the field. | |
468 | ||
469 | Search for a mode that is compatible with the memory access | |
470 | restrictions and (where applicable) with a register insertion or | |
471 | extraction. Return the new memory on success, storing the adjusted | |
472 | bit position in *NEW_BITNUM. Return null otherwise. */ | |
473 | ||
474 | static rtx | |
475 | adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, | |
476 | rtx op0, HOST_WIDE_INT bitsize, | |
477 | HOST_WIDE_INT bitnum, | |
8c59e5e7 RS |
478 | poly_uint64 bitregion_start, |
479 | poly_uint64 bitregion_end, | |
ef4bddc2 | 480 | machine_mode fieldmode, |
fcdd52b7 RS |
481 | unsigned HOST_WIDE_INT *new_bitnum) |
482 | { | |
483 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, | |
484 | bitregion_end, MEM_ALIGN (op0), | |
485 | MEM_VOLATILE_P (op0)); | |
ae927046 | 486 | scalar_int_mode best_mode; |
fcdd52b7 RS |
487 | if (iter.next_mode (&best_mode)) |
488 | { | |
489 | /* We can use a memory in BEST_MODE. See whether this is true for | |
490 | any wider modes. All other things being equal, we prefer to | |
491 | use the widest mode possible because it tends to expose more | |
492 | CSE opportunities. */ | |
493 | if (!iter.prefer_smaller_modes ()) | |
494 | { | |
495 | /* Limit the search to the mode required by the corresponding | |
496 | register insertion or extraction instruction, if any. */ | |
095a2d76 | 497 | scalar_int_mode limit_mode = word_mode; |
fcdd52b7 RS |
498 | extraction_insn insn; |
499 | if (get_best_reg_extraction_insn (&insn, pattern, | |
500 | GET_MODE_BITSIZE (best_mode), | |
501 | fieldmode)) | |
502 | limit_mode = insn.field_mode; | |
503 | ||
ae927046 | 504 | scalar_int_mode wider_mode; |
fcdd52b7 RS |
505 | while (iter.next_mode (&wider_mode) |
506 | && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) | |
507 | best_mode = wider_mode; | |
508 | } | |
509 | return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, | |
510 | new_bitnum); | |
511 | } | |
512 | return NULL_RTX; | |
513 | } | |
514 | ||
bebf0797 RS |
515 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
516 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg | |
517 | offset is then BITNUM / BITS_PER_UNIT. */ | |
518 | ||
519 | static bool | |
fc60a416 | 520 | lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize, |
ef4bddc2 | 521 | machine_mode struct_mode) |
bebf0797 | 522 | { |
fc60a416 | 523 | poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode); |
bebf0797 | 524 | if (BYTES_BIG_ENDIAN) |
fc60a416 RS |
525 | return (multiple_p (bitnum, BITS_PER_UNIT) |
526 | && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode)) | |
527 | || multiple_p (bitnum + bitsize, | |
528 | regsize * BITS_PER_UNIT))); | |
bebf0797 | 529 | else |
fc60a416 | 530 | return multiple_p (bitnum, regsize * BITS_PER_UNIT); |
bebf0797 | 531 | } |
00efe3ea | 532 | |
548cfdc2 | 533 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
6f4e9cf8 BE |
534 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
535 | Return false if the access would touch memory outside the range | |
536 | BITREGION_START to BITREGION_END for conformance to the C++ memory | |
537 | model. */ | |
f5d4f18c SL |
538 | |
539 | static bool | |
540 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
541 | unsigned HOST_WIDE_INT bitnum, | |
0ef40942 | 542 | scalar_int_mode fieldmode, |
8c59e5e7 RS |
543 | poly_uint64 bitregion_start, |
544 | poly_uint64 bitregion_end) | |
f5d4f18c SL |
545 | { |
546 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); | |
547 | ||
548 | /* -fstrict-volatile-bitfields must be enabled and we must have a | |
549 | volatile MEM. */ | |
550 | if (!MEM_P (op0) | |
551 | || !MEM_VOLATILE_P (op0) | |
552 | || flag_strict_volatile_bitfields <= 0) | |
553 | return false; | |
554 | ||
f5d4f18c SL |
555 | /* The bit size must not be larger than the field mode, and |
556 | the field mode must not be larger than a word. */ | |
557 | if (bitsize > modesize || modesize > BITS_PER_WORD) | |
558 | return false; | |
559 | ||
560 | /* Check for cases of unaligned fields that must be split. */ | |
b6dd42a9 BE |
561 | if (bitnum % modesize + bitsize > modesize) |
562 | return false; | |
563 | ||
564 | /* The memory must be sufficiently aligned for a MODESIZE access. | |
565 | This condition guarantees, that the memory access will not | |
566 | touch anything after the end of the structure. */ | |
567 | if (MEM_ALIGN (op0) < modesize) | |
f5d4f18c SL |
568 | return false; |
569 | ||
6f4e9cf8 | 570 | /* Check for cases where the C++ memory model applies. */ |
8c59e5e7 RS |
571 | if (maybe_ne (bitregion_end, 0U) |
572 | && (maybe_lt (bitnum - bitnum % modesize, bitregion_start) | |
573 | || maybe_gt (bitnum - bitnum % modesize + modesize - 1, | |
574 | bitregion_end))) | |
6f4e9cf8 BE |
575 | return false; |
576 | ||
f5d4f18c SL |
577 | return true; |
578 | } | |
579 | ||
00efe3ea | 580 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
2d7b38df RS |
581 | bit number BITNUM can be treated as a simple value of mode MODE. |
582 | Store the byte offset in *BYTENUM if so. */ | |
00efe3ea RS |
583 | |
584 | static bool | |
2d7b38df RS |
585 | simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum, |
586 | machine_mode mode, poly_uint64 *bytenum) | |
00efe3ea RS |
587 | { |
588 | return (MEM_P (op0) | |
2d7b38df RS |
589 | && multiple_p (bitnum, BITS_PER_UNIT, bytenum) |
590 | && known_eq (bitsize, GET_MODE_BITSIZE (mode)) | |
e0bd6c9f | 591 | && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) |
2d7b38df | 592 | || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode)) |
00efe3ea RS |
593 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); |
594 | } | |
6d7db3c5 | 595 | \f |
fcdd52b7 | 596 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
1a527092 RS |
597 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a |
598 | BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM | |
599 | are as for store_bit_field. */ | |
a20556e4 RS |
600 | |
601 | static bool | |
fcdd52b7 | 602 | store_bit_field_using_insv (const extraction_insn *insv, rtx op0, |
1a527092 | 603 | opt_scalar_int_mode op0_mode, |
fcdd52b7 | 604 | unsigned HOST_WIDE_INT bitsize, |
548cfdc2 | 605 | unsigned HOST_WIDE_INT bitnum, |
1a527092 | 606 | rtx value, scalar_int_mode value_mode) |
a20556e4 | 607 | { |
99b1c316 | 608 | class expand_operand ops[4]; |
a20556e4 RS |
609 | rtx value1; |
610 | rtx xop0 = op0; | |
f3f6fb16 | 611 | rtx_insn *last = get_last_insn (); |
a20556e4 RS |
612 | bool copy_back = false; |
613 | ||
1a527092 | 614 | scalar_int_mode op_mode = insv->field_mode; |
a20556e4 RS |
615 | unsigned int unit = GET_MODE_BITSIZE (op_mode); |
616 | if (bitsize == 0 || bitsize > unit) | |
617 | return false; | |
618 | ||
619 | if (MEM_P (xop0)) | |
26f8b976 | 620 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
621 | xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, |
622 | &bitnum); | |
a20556e4 RS |
623 | else |
624 | { | |
625 | /* Convert from counting within OP0 to counting in OP_MODE. */ | |
626 | if (BYTES_BIG_ENDIAN) | |
1a527092 | 627 | bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); |
a20556e4 RS |
628 | |
629 | /* If xop0 is a register, we need it in OP_MODE | |
630 | to make it acceptable to the format of insv. */ | |
631 | if (GET_CODE (xop0) == SUBREG) | |
0ad6de38 JJ |
632 | { |
633 | /* If such a SUBREG can't be created, give up. */ | |
634 | if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)), | |
635 | SUBREG_REG (xop0), SUBREG_BYTE (xop0))) | |
636 | return false; | |
637 | /* We can't just change the mode, because this might clobber op0, | |
638 | and we will need the original value of op0 if insv fails. */ | |
639 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), | |
640 | SUBREG_BYTE (xop0)); | |
641 | } | |
a20556e4 RS |
642 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) |
643 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); | |
644 | } | |
645 | ||
646 | /* If the destination is a paradoxical subreg such that we need a | |
647 | truncate to the inner mode, perform the insertion on a temporary and | |
648 | truncate the result to the original destination. Note that we can't | |
649 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
650 | X) 0)) is (reg:N X). */ | |
651 | if (GET_CODE (xop0) == SUBREG | |
652 | && REG_P (SUBREG_REG (xop0)) | |
653 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), | |
654 | op_mode)) | |
655 | { | |
656 | rtx tem = gen_reg_rtx (op_mode); | |
657 | emit_move_insn (tem, xop0); | |
658 | xop0 = tem; | |
659 | copy_back = true; | |
660 | } | |
661 | ||
4ae9783e JW |
662 | /* There are similar overflow check at the start of store_bit_field_1, |
663 | but that only check the situation where the field lies completely | |
664 | outside the register, while there do have situation where the field | |
665 | lies partialy in the register, we need to adjust bitsize for this | |
666 | partial overflow situation. Without this fix, pr48335-2.c on big-endian | |
667 | will broken on those arch support bit insert instruction, like arm, aarch64 | |
668 | etc. */ | |
669 | if (bitsize + bitnum > unit && bitnum < unit) | |
670 | { | |
e623cedf JW |
671 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |
672 | "destination object, data truncated into %wu-bit", | |
673 | bitsize, unit - bitnum); | |
4ae9783e JW |
674 | bitsize = unit - bitnum; |
675 | } | |
676 | ||
a20556e4 RS |
677 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
678 | "backwards" from the size of the unit we are inserting into. | |
679 | Otherwise, we count bits from the most significant on a | |
680 | BYTES/BITS_BIG_ENDIAN machine. */ | |
681 | ||
682 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
683 | bitnum = unit - bitsize - bitnum; | |
684 | ||
685 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ | |
686 | value1 = value; | |
1a527092 | 687 | if (value_mode != op_mode) |
a20556e4 | 688 | { |
1a527092 | 689 | if (GET_MODE_BITSIZE (value_mode) >= bitsize) |
a20556e4 | 690 | { |
686d390a | 691 | rtx tmp; |
a20556e4 RS |
692 | /* Optimization: Don't bother really extending VALUE |
693 | if it has all the bits we will actually use. However, | |
694 | if we must narrow it, be sure we do it correctly. */ | |
695 | ||
1a527092 | 696 | if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode)) |
a20556e4 | 697 | { |
1a527092 | 698 | tmp = simplify_subreg (op_mode, value1, value_mode, 0); |
a20556e4 RS |
699 | if (! tmp) |
700 | tmp = simplify_gen_subreg (op_mode, | |
1a527092 RS |
701 | force_reg (value_mode, value1), |
702 | value_mode, 0); | |
a20556e4 RS |
703 | } |
704 | else | |
686d390a JJ |
705 | { |
706 | tmp = gen_lowpart_if_possible (op_mode, value1); | |
707 | if (! tmp) | |
1a527092 | 708 | tmp = gen_lowpart (op_mode, force_reg (value_mode, value1)); |
686d390a JJ |
709 | } |
710 | value1 = tmp; | |
a20556e4 RS |
711 | } |
712 | else if (CONST_INT_P (value)) | |
713 | value1 = gen_int_mode (INTVAL (value), op_mode); | |
714 | else | |
715 | /* Parse phase is supposed to make VALUE's data type | |
716 | match that of the component reference, which is a type | |
717 | at least as wide as the field; so VALUE should have | |
718 | a mode that corresponds to that type. */ | |
719 | gcc_assert (CONSTANT_P (value)); | |
720 | } | |
721 | ||
722 | create_fixed_operand (&ops[0], xop0); | |
723 | create_integer_operand (&ops[1], bitsize); | |
724 | create_integer_operand (&ops[2], bitnum); | |
725 | create_input_operand (&ops[3], value1, op_mode); | |
fcdd52b7 | 726 | if (maybe_expand_insn (insv->icode, 4, ops)) |
a20556e4 RS |
727 | { |
728 | if (copy_back) | |
729 | convert_move (op0, xop0, true); | |
730 | return true; | |
731 | } | |
732 | delete_insns_since (last); | |
733 | return false; | |
734 | } | |
735 | ||
6d7db3c5 RS |
736 | /* A subroutine of store_bit_field, with the same arguments. Return true |
737 | if the operation could be implemented. | |
44037a66 | 738 | |
6d7db3c5 RS |
739 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
740 | no other way of implementing the operation. If FALLBACK_P is false, | |
741 | return false instead. */ | |
742 | ||
743 | static bool | |
2d7b38df | 744 | store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
8c59e5e7 | 745 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
ef4bddc2 | 746 | machine_mode fieldmode, |
ee45a32d | 747 | rtx value, bool reverse, bool fallback_p) |
44037a66 | 748 | { |
b3694847 | 749 | rtx op0 = str_rtx; |
da920570 | 750 | |
44037a66 TG |
751 | while (GET_CODE (op0) == SUBREG) |
752 | { | |
3d09ba95 | 753 | bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT; |
44037a66 TG |
754 | op0 = SUBREG_REG (op0); |
755 | } | |
756 | ||
2c58f7dd RS |
757 | /* No action is needed if the target is a register and if the field |
758 | lies completely outside that register. This can occur if the source | |
759 | code contains an out-of-bounds access to a small array. */ | |
2d7b38df | 760 | if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) |
6d7db3c5 | 761 | return true; |
2c58f7dd | 762 | |
b42271d6 | 763 | /* Use vec_set patterns for inserting parts of vectors whenever |
997404de | 764 | available. */ |
d21cefc2 RS |
765 | machine_mode outermode = GET_MODE (op0); |
766 | scalar_mode innermode = GET_MODE_INNER (outermode); | |
2d7b38df | 767 | poly_uint64 pos; |
d21cefc2 | 768 | if (VECTOR_MODE_P (outermode) |
3c0cb5de | 769 | && !MEM_P (op0) |
d21cefc2 RS |
770 | && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing |
771 | && fieldmode == innermode | |
2d7b38df RS |
772 | && known_eq (bitsize, GET_MODE_BITSIZE (innermode)) |
773 | && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos)) | |
997404de | 774 | { |
99b1c316 | 775 | class expand_operand ops[3]; |
a5c7d693 | 776 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |
997404de | 777 | |
a5c7d693 RS |
778 | create_fixed_operand (&ops[0], op0); |
779 | create_input_operand (&ops[1], value, innermode); | |
780 | create_integer_operand (&ops[2], pos); | |
781 | if (maybe_expand_insn (icode, 3, ops)) | |
782 | return true; | |
997404de JH |
783 | } |
784 | ||
308ecea0 | 785 | /* If the target is a register, overwriting the entire object, or storing |
bebf0797 RS |
786 | a full-word or multi-word field can be done with just a SUBREG. */ |
787 | if (!MEM_P (op0) | |
2d7b38df | 788 | && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))) |
bebf0797 RS |
789 | { |
790 | /* Use the subreg machinery either to narrow OP0 to the required | |
d8c84975 JJ |
791 | words or to cope with mode punning between equal-sized modes. |
792 | In the latter case, use subreg on the rhs side, not lhs. */ | |
793 | rtx sub; | |
2d7b38df | 794 | HOST_WIDE_INT regnum; |
fad2288b | 795 | poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0)); |
2d7b38df RS |
796 | if (known_eq (bitnum, 0U) |
797 | && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) | |
d8c84975 JJ |
798 | { |
799 | sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); | |
800 | if (sub) | |
801 | { | |
ee45a32d EB |
802 | if (reverse) |
803 | sub = flip_storage_order (GET_MODE (op0), sub); | |
d8c84975 JJ |
804 | emit_move_insn (op0, sub); |
805 | return true; | |
806 | } | |
807 | } | |
2d7b38df | 808 | else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, ®num) |
147ed018 RB |
809 | && multiple_p (bitsize, regsize * BITS_PER_UNIT) |
810 | && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize)) | |
bebf0797 | 811 | { |
d8c84975 | 812 | sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |
2d7b38df | 813 | regnum * regsize); |
d8c84975 JJ |
814 | if (sub) |
815 | { | |
ee45a32d EB |
816 | if (reverse) |
817 | value = flip_storage_order (fieldmode, value); | |
d8c84975 JJ |
818 | emit_move_insn (sub, value); |
819 | return true; | |
820 | } | |
bebf0797 RS |
821 | } |
822 | } | |
308ecea0 | 823 | |
bebf0797 | 824 | /* If the target is memory, storing any naturally aligned field can be |
308ecea0 | 825 | done with a simple store. For targets that support fast unaligned |
0b69c29f | 826 | memory, any naturally sized, unit aligned field can be done directly. */ |
2d7b38df RS |
827 | poly_uint64 bytenum; |
828 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum)) | |
44037a66 | 829 | { |
2d7b38df | 830 | op0 = adjust_bitfield_address (op0, fieldmode, bytenum); |
ee45a32d EB |
831 | if (reverse) |
832 | value = flip_storage_order (fieldmode, value); | |
44037a66 | 833 | emit_move_insn (op0, value); |
6d7db3c5 | 834 | return true; |
44037a66 TG |
835 | } |
836 | ||
2d7b38df RS |
837 | /* It's possible we'll need to handle other cases here for |
838 | polynomial bitnum and bitsize. */ | |
839 | ||
840 | /* From here on we need to be looking at a fixed-size insertion. */ | |
841 | unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant (); | |
842 | unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant (); | |
843 | ||
a8ca7756 JW |
844 | /* Make sure we are playing with integral modes. Pun with subregs |
845 | if we aren't. This must come after the entire register case above, | |
846 | since that case is valid for any mode. The following cases are only | |
847 | valid for integral modes. */ | |
1a527092 | 848 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
304b9962 | 849 | scalar_int_mode imode; |
1a527092 | 850 | if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) |
304b9962 RS |
851 | { |
852 | if (MEM_P (op0)) | |
1a527092 | 853 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
304b9962 | 854 | 0, MEM_SIZE (op0)); |
d742b0c1 JJ |
855 | else if (!op0_mode.exists ()) |
856 | { | |
857 | if (ibitnum == 0 | |
858 | && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0))) | |
859 | && MEM_P (value) | |
860 | && !reverse) | |
861 | { | |
862 | value = adjust_address (value, GET_MODE (op0), 0); | |
863 | emit_move_insn (op0, value); | |
864 | return true; | |
865 | } | |
866 | if (!fallback_p) | |
867 | return false; | |
868 | rtx temp = assign_stack_temp (GET_MODE (op0), | |
869 | GET_MODE_SIZE (GET_MODE (op0))); | |
870 | emit_move_insn (temp, op0); | |
871 | store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value, | |
872 | reverse, fallback_p); | |
873 | emit_move_insn (op0, temp); | |
874 | return true; | |
875 | } | |
304b9962 RS |
876 | else |
877 | op0 = gen_lowpart (op0_mode.require (), op0); | |
878 | } | |
a8ca7756 | 879 | |
2d7b38df RS |
880 | return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum, |
881 | bitregion_start, bitregion_end, | |
882 | fieldmode, value, reverse, fallback_p); | |
883 | } | |
884 | ||
885 | /* Subroutine of store_bit_field_1, with the same arguments, except | |
886 | that BITSIZE and BITNUM are constant. Handle cases specific to | |
887 | integral modes. If OP0_MODE is defined, it is the mode of OP0, | |
888 | otherwise OP0 is a BLKmode MEM. */ | |
889 | ||
890 | static bool | |
891 | store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, | |
892 | unsigned HOST_WIDE_INT bitsize, | |
893 | unsigned HOST_WIDE_INT bitnum, | |
8c59e5e7 RS |
894 | poly_uint64 bitregion_start, |
895 | poly_uint64 bitregion_end, | |
2d7b38df RS |
896 | machine_mode fieldmode, |
897 | rtx value, bool reverse, bool fallback_p) | |
898 | { | |
44037a66 | 899 | /* Storing an lsb-aligned field in a register |
bebf0797 | 900 | can be done with a movstrict instruction. */ |
44037a66 | 901 | |
3c0cb5de | 902 | if (!MEM_P (op0) |
ee45a32d | 903 | && !reverse |
2d7b38df | 904 | && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ()) |
73a699ae | 905 | && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)) |
947131ba | 906 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
44037a66 | 907 | { |
99b1c316 | 908 | class expand_operand ops[2]; |
a5c7d693 | 909 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); |
5d560619 | 910 | rtx arg0 = op0; |
19228b93 | 911 | unsigned HOST_WIDE_INT subreg_off; |
5e4900c7 | 912 | |
a5c7d693 | 913 | if (GET_CODE (arg0) == SUBREG) |
44037a66 | 914 | { |
5b0264cb NS |
915 | /* Else we've got some float mode source being extracted into |
916 | a different float mode destination -- this combination of | |
917 | subregs results in Severe Tire Damage. */ | |
a5c7d693 | 918 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
5b0264cb NS |
919 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
920 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
a5c7d693 | 921 | arg0 = SUBREG_REG (arg0); |
5e4900c7 | 922 | } |
470032d7 | 923 | |
bebf0797 | 924 | subreg_off = bitnum / BITS_PER_UNIT; |
8408d345 JJ |
925 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off) |
926 | /* STRICT_LOW_PART must have a non-paradoxical subreg as | |
927 | operand. */ | |
928 | && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0))) | |
19228b93 JJ |
929 | { |
930 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); | |
a5c7d693 | 931 | |
19228b93 JJ |
932 | create_fixed_operand (&ops[0], arg0); |
933 | /* Shrink the source operand to FIELDMODE. */ | |
934 | create_convert_operand_to (&ops[1], value, fieldmode, false); | |
935 | if (maybe_expand_insn (icode, 2, ops)) | |
936 | return true; | |
937 | } | |
44037a66 TG |
938 | } |
939 | ||
940 | /* Handle fields bigger than a word. */ | |
941 | ||
942 | if (bitsize > BITS_PER_WORD) | |
943 | { | |
944 | /* Here we transfer the words of the field | |
945 | in the order least significant first. | |
946 | This is because the most significant word is the one which may | |
ad83e87b PB |
947 | be less than full. |
948 | However, only do that if the value is not BLKmode. */ | |
949 | ||
ee45a32d | 950 | const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
82f620e2 | 951 | const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
f3f6fb16 | 952 | rtx_insn *last; |
44037a66 TG |
953 | |
954 | /* This is the mode we must force value to, so that there will be enough | |
955 | subwords to extract. Note that fieldmode will often (always?) be | |
956 | VOIDmode, because that is what store_field uses to indicate that this | |
535a42b1 | 957 | is a bit field, but passing VOIDmode to operand_subword_force |
2d7b38df RS |
958 | is not allowed. |
959 | ||
960 | The mode must be fixed-size, since insertions into variable-sized | |
961 | objects are meant to be handled before calling this function. */ | |
962 | fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value)); | |
963 | if (value_mode == VOIDmode) | |
964 | value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD); | |
44037a66 | 965 | |
6d7db3c5 | 966 | last = get_last_insn (); |
82f620e2 | 967 | for (int i = 0; i < nwords; i++) |
44037a66 | 968 | { |
82f620e2 EB |
969 | /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD |
970 | except maybe for the last iteration. */ | |
971 | const unsigned HOST_WIDE_INT new_bitsize | |
972 | = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); | |
973 | /* Bit offset from the starting bit number in the target. */ | |
974 | const unsigned int bit_offset | |
975 | = backwards ^ reverse | |
976 | ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0) | |
977 | : i * BITS_PER_WORD; | |
978 | /* Starting word number in the value. */ | |
979 | const unsigned int wordnum | |
980 | = backwards | |
981 | ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1) | |
982 | : i; | |
983 | /* The chunk of the value in word_mode. We use bit-field extraction | |
984 | in BLKmode to handle unaligned memory references and to shift the | |
985 | last chunk right on big-endian machines if need be. */ | |
986 | rtx value_word | |
987 | = fieldmode == BLKmode | |
988 | ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD, | |
989 | 1, NULL_RTX, word_mode, word_mode, false, | |
990 | NULL) | |
991 | : operand_subword_force (value, wordnum, value_mode); | |
3bdb97b8 AK |
992 | |
993 | if (!store_bit_field_1 (op0, new_bitsize, | |
1169e45d AH |
994 | bitnum + bit_offset, |
995 | bitregion_start, bitregion_end, | |
996 | word_mode, | |
ee45a32d | 997 | value_word, reverse, fallback_p)) |
6d7db3c5 RS |
998 | { |
999 | delete_insns_since (last); | |
1000 | return false; | |
1001 | } | |
44037a66 | 1002 | } |
6d7db3c5 | 1003 | return true; |
44037a66 TG |
1004 | } |
1005 | ||
4f1da2e9 RS |
1006 | /* If VALUE has a floating-point or complex mode, access it as an |
1007 | integer of the corresponding size. This can occur on a machine | |
1008 | with 64 bit registers that uses SFmode for float. It can also | |
1009 | occur for unaligned float or complex fields. */ | |
2d7b38df | 1010 | rtx orig_value = value; |
1a527092 RS |
1011 | scalar_int_mode value_mode; |
1012 | if (GET_MODE (value) == VOIDmode) | |
1013 | /* By this point we've dealt with values that are bigger than a word, | |
1014 | so word_mode is a conservatively correct choice. */ | |
1015 | value_mode = word_mode; | |
1016 | else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode)) | |
4f1da2e9 | 1017 | { |
1a527092 RS |
1018 | value_mode = int_mode_for_mode (GET_MODE (value)).require (); |
1019 | value = gen_reg_rtx (value_mode); | |
4f1da2e9 RS |
1020 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); |
1021 | } | |
2305bcad | 1022 | |
bebf0797 | 1023 | /* If OP0 is a multi-word register, narrow it to the affected word. |
7d790165 JJ |
1024 | If the region spans two words, defer to store_split_bit_field. |
1025 | Don't do this if op0 is a single hard register wider than word | |
1026 | such as a float or vector register. */ | |
1027 | if (!MEM_P (op0) | |
1a527092 | 1028 | && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD |
7d790165 JJ |
1029 | && (!REG_P (op0) |
1030 | || !HARD_REGISTER_P (op0) | |
a93072ca | 1031 | || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1)) |
bebf0797 | 1032 | { |
867a0126 | 1033 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
bebf0797 RS |
1034 | { |
1035 | if (!fallback_p) | |
1036 | return false; | |
1037 | ||
1a527092 RS |
1038 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1039 | bitregion_start, bitregion_end, | |
1040 | value, value_mode, reverse); | |
bebf0797 RS |
1041 | return true; |
1042 | } | |
1a527092 | 1043 | op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), |
867a0126 RS |
1044 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
1045 | gcc_assert (op0); | |
1a527092 | 1046 | op0_mode = word_mode; |
867a0126 | 1047 | bitnum %= BITS_PER_WORD; |
bebf0797 RS |
1048 | } |
1049 | ||
1050 | /* From here on we can assume that the field to be stored in fits | |
1051 | within a word. If the destination is a register, it too fits | |
1052 | in a word. */ | |
44037a66 | 1053 | |
fcdd52b7 RS |
1054 | extraction_insn insv; |
1055 | if (!MEM_P (op0) | |
ee45a32d | 1056 | && !reverse |
fcdd52b7 | 1057 | && get_best_reg_extraction_insn (&insv, EP_insv, |
1a527092 | 1058 | GET_MODE_BITSIZE (op0_mode.require ()), |
fcdd52b7 | 1059 | fieldmode) |
1a527092 RS |
1060 | && store_bit_field_using_insv (&insv, op0, op0_mode, |
1061 | bitsize, bitnum, value, value_mode)) | |
a20556e4 | 1062 | return true; |
6d7db3c5 RS |
1063 | |
1064 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
1065 | cheap register alternative is available. */ | |
ee45a32d | 1066 | if (MEM_P (op0) && !reverse) |
6d7db3c5 | 1067 | { |
f5d4f18c SL |
1068 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
1069 | fieldmode) | |
1a527092 RS |
1070 | && store_bit_field_using_insv (&insv, op0, op0_mode, |
1071 | bitsize, bitnum, value, value_mode)) | |
17a73ba0 RS |
1072 | return true; |
1073 | ||
f3f6fb16 | 1074 | rtx_insn *last = get_last_insn (); |
6d7db3c5 | 1075 | |
fcdd52b7 RS |
1076 | /* Try loading part of OP0 into a register, inserting the bitfield |
1077 | into that, and then copying the result back to OP0. */ | |
1078 | unsigned HOST_WIDE_INT bitpos; | |
1079 | rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, | |
1080 | bitregion_start, bitregion_end, | |
1081 | fieldmode, &bitpos); | |
1082 | if (xop0) | |
0fb7aeda | 1083 | { |
fcdd52b7 | 1084 | rtx tempreg = copy_to_reg (xop0); |
bebf0797 | 1085 | if (store_bit_field_1 (tempreg, bitsize, bitpos, |
1169e45d | 1086 | bitregion_start, bitregion_end, |
ee45a32d | 1087 | fieldmode, orig_value, reverse, false)) |
6d7db3c5 RS |
1088 | { |
1089 | emit_move_insn (xop0, tempreg); | |
1090 | return true; | |
1091 | } | |
44037a66 | 1092 | delete_insns_since (last); |
44037a66 TG |
1093 | } |
1094 | } | |
6d7db3c5 RS |
1095 | |
1096 | if (!fallback_p) | |
1097 | return false; | |
1098 | ||
1a527092 RS |
1099 | store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start, |
1100 | bitregion_end, value, value_mode, reverse); | |
6d7db3c5 RS |
1101 | return true; |
1102 | } | |
1103 | ||
1104 | /* Generate code to store value from rtx VALUE | |
1105 | into a bit-field within structure STR_RTX | |
1106 | containing BITSIZE bits starting at bit BITNUM. | |
1169e45d AH |
1107 | |
1108 | BITREGION_START is bitpos of the first bitfield in this region. | |
1109 | BITREGION_END is the bitpos of the ending bitfield in this region. | |
1110 | These two fields are 0, if the C++ memory model does not apply, | |
1111 | or we are not interested in keeping track of bitfield regions. | |
1112 | ||
ee45a32d EB |
1113 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. |
1114 | ||
1115 | If REVERSE is true, the store is to be done in reverse order. */ | |
6d7db3c5 RS |
1116 | |
1117 | void | |
2d7b38df | 1118 | store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
8c59e5e7 | 1119 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
ef4bddc2 | 1120 | machine_mode fieldmode, |
ee45a32d | 1121 | rtx value, bool reverse) |
6d7db3c5 | 1122 | { |
f5d4f18c | 1123 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
2d7b38df | 1124 | unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0; |
0ef40942 | 1125 | scalar_int_mode int_mode; |
2d7b38df RS |
1126 | if (bitsize.is_constant (&ibitsize) |
1127 | && bitnum.is_constant (&ibitnum) | |
1128 | && is_a <scalar_int_mode> (fieldmode, &int_mode) | |
1129 | && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode, | |
0ef40942 | 1130 | bitregion_start, bitregion_end)) |
f5d4f18c | 1131 | { |
b6dd42a9 BE |
1132 | /* Storing of a full word can be done with a simple store. |
1133 | We know here that the field can be accessed with one single | |
1134 | instruction. For targets that support unaligned memory, | |
1135 | an unaligned access may be necessary. */ | |
2d7b38df | 1136 | if (ibitsize == GET_MODE_BITSIZE (int_mode)) |
f5d4f18c | 1137 | { |
0ef40942 | 1138 | str_rtx = adjust_bitfield_address (str_rtx, int_mode, |
2d7b38df | 1139 | ibitnum / BITS_PER_UNIT); |
ee45a32d | 1140 | if (reverse) |
0ef40942 | 1141 | value = flip_storage_order (int_mode, value); |
2d7b38df | 1142 | gcc_assert (ibitnum % BITS_PER_UNIT == 0); |
f5d4f18c SL |
1143 | emit_move_insn (str_rtx, value); |
1144 | } | |
1145 | else | |
ebb99f96 | 1146 | { |
53c615a2 BE |
1147 | rtx temp; |
1148 | ||
2d7b38df RS |
1149 | str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, |
1150 | ibitnum, &ibitnum); | |
1151 | gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); | |
53c615a2 | 1152 | temp = copy_to_reg (str_rtx); |
2d7b38df | 1153 | if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0, |
0ef40942 | 1154 | int_mode, value, reverse, true)) |
53c615a2 BE |
1155 | gcc_unreachable (); |
1156 | ||
1157 | emit_move_insn (str_rtx, temp); | |
ebb99f96 BE |
1158 | } |
1159 | ||
f5d4f18c SL |
1160 | return; |
1161 | } | |
1162 | ||
1169e45d AH |
1163 | /* Under the C++0x memory model, we must not touch bits outside the |
1164 | bit region. Adjust the address to start at the beginning of the | |
1165 | bit region. */ | |
8c59e5e7 | 1166 | if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U)) |
1169e45d | 1167 | { |
ae927046 RS |
1168 | scalar_int_mode best_mode; |
1169 | machine_mode addr_mode = VOIDmode; | |
a59b038c | 1170 | |
8c59e5e7 | 1171 | poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT); |
1169e45d | 1172 | bitnum -= bitregion_start; |
2d7b38df | 1173 | poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize); |
1169e45d AH |
1174 | bitregion_end -= bitregion_start; |
1175 | bitregion_start = 0; | |
2d7b38df RS |
1176 | if (bitsize.is_constant (&ibitsize) |
1177 | && bitnum.is_constant (&ibitnum) | |
1178 | && get_best_mode (ibitsize, ibitnum, | |
1179 | bitregion_start, bitregion_end, | |
1180 | MEM_ALIGN (str_rtx), INT_MAX, | |
1181 | MEM_VOLATILE_P (str_rtx), &best_mode)) | |
ae927046 RS |
1182 | addr_mode = best_mode; |
1183 | str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode, | |
1184 | offset, size); | |
1169e45d AH |
1185 | } |
1186 | ||
1187 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, | |
1188 | bitregion_start, bitregion_end, | |
ee45a32d | 1189 | fieldmode, value, reverse, true)) |
6d7db3c5 | 1190 | gcc_unreachable (); |
44037a66 TG |
1191 | } |
1192 | \f | |
bebf0797 | 1193 | /* Use shifts and boolean operations to store VALUE into a bit field of |
1a527092 RS |
1194 | width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined, |
1195 | it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is | |
1196 | the mode of VALUE. | |
ee45a32d EB |
1197 | |
1198 | If REVERSE is true, the store is to be done in reverse order. */ | |
44037a66 TG |
1199 | |
1200 | static void | |
1a527092 RS |
1201 | store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1202 | unsigned HOST_WIDE_INT bitsize, | |
bebf0797 | 1203 | unsigned HOST_WIDE_INT bitnum, |
8c59e5e7 | 1204 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1a527092 | 1205 | rtx value, scalar_int_mode value_mode, bool reverse) |
44037a66 | 1206 | { |
44037a66 TG |
1207 | /* There is a case not handled here: |
1208 | a structure with a known alignment of just a halfword | |
1209 | and a field split across two aligned halfwords within the structure. | |
1210 | Or likewise a structure with a known alignment of just a byte | |
1211 | and a field split across two bytes. | |
1212 | Such cases are not supposed to be able to occur. */ | |
1213 | ||
1a527092 | 1214 | scalar_int_mode best_mode; |
bebf0797 | 1215 | if (MEM_P (op0)) |
44037a66 | 1216 | { |
1a527092 RS |
1217 | unsigned int max_bitsize = BITS_PER_WORD; |
1218 | scalar_int_mode imode; | |
1219 | if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize) | |
1220 | max_bitsize = GET_MODE_BITSIZE (imode); | |
1221 | ||
ae927046 | 1222 | if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
1a527092 RS |
1223 | MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0), |
1224 | &best_mode)) | |
44037a66 TG |
1225 | { |
1226 | /* The only way this should occur is if the field spans word | |
1227 | boundaries. */ | |
1a527092 RS |
1228 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1229 | bitregion_start, bitregion_end, | |
1230 | value, value_mode, reverse); | |
44037a66 TG |
1231 | return; |
1232 | } | |
1233 | ||
ae927046 | 1234 | op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum); |
44037a66 | 1235 | } |
1a527092 RS |
1236 | else |
1237 | best_mode = op0_mode.require (); | |
44037a66 | 1238 | |
1a527092 RS |
1239 | store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum, |
1240 | value, value_mode, reverse); | |
ebb99f96 BE |
1241 | } |
1242 | ||
1243 | /* Helper function for store_fixed_bit_field, stores | |
1a527092 RS |
1244 | the bit field always using MODE, which is the mode of OP0. The other |
1245 | arguments are as for store_fixed_bit_field. */ | |
ebb99f96 BE |
1246 | |
1247 | static void | |
1a527092 RS |
1248 | store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode, |
1249 | unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 | 1250 | unsigned HOST_WIDE_INT bitnum, |
1a527092 | 1251 | rtx value, scalar_int_mode value_mode, bool reverse) |
ebb99f96 | 1252 | { |
ebb99f96 BE |
1253 | rtx temp; |
1254 | int all_zero = 0; | |
1255 | int all_one = 0; | |
1256 | ||
bebf0797 RS |
1257 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1258 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ | |
44037a66 | 1259 | |
ee45a32d | 1260 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
bebf0797 RS |
1261 | /* BITNUM is the distance between our msb |
1262 | and that of the containing datum. | |
1263 | Convert it to the distance from the lsb. */ | |
1264 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; | |
44037a66 | 1265 | |
bebf0797 | 1266 | /* Now BITNUM is always the distance between our lsb |
44037a66 TG |
1267 | and that of OP0. */ |
1268 | ||
bebf0797 | 1269 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
44037a66 TG |
1270 | we must first convert its mode to MODE. */ |
1271 | ||
481683e1 | 1272 | if (CONST_INT_P (value)) |
44037a66 | 1273 | { |
e507a433 | 1274 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
44037a66 | 1275 | |
b1ec3c92 | 1276 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
fecfbfa4 | 1277 | v &= (HOST_WIDE_INT_1U << bitsize) - 1; |
44037a66 TG |
1278 | |
1279 | if (v == 0) | |
1280 | all_zero = 1; | |
b1ec3c92 | 1281 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
fecfbfa4 | 1282 | && v == (HOST_WIDE_INT_1U << bitsize) - 1) |
e507a433 | 1283 | || (bitsize == HOST_BITS_PER_WIDE_INT |
fecfbfa4 | 1284 | && v == HOST_WIDE_INT_M1U)) |
44037a66 TG |
1285 | all_one = 1; |
1286 | ||
088c5368 | 1287 | value = lshift_value (mode, v, bitnum); |
44037a66 TG |
1288 | } |
1289 | else | |
1290 | { | |
1a527092 | 1291 | int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize |
bebf0797 | 1292 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
44037a66 | 1293 | |
1a527092 | 1294 | if (value_mode != mode) |
86cfb27a | 1295 | value = convert_to_mode (mode, value, 1); |
44037a66 TG |
1296 | |
1297 | if (must_and) | |
1298 | value = expand_binop (mode, and_optab, value, | |
1299 | mask_rtx (mode, 0, bitsize, 0), | |
b1ec3c92 | 1300 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
bebf0797 | 1301 | if (bitnum > 0) |
44037a66 | 1302 | value = expand_shift (LSHIFT_EXPR, mode, value, |
bebf0797 | 1303 | bitnum, NULL_RTX, 1); |
44037a66 TG |
1304 | } |
1305 | ||
ee45a32d EB |
1306 | if (reverse) |
1307 | value = flip_storage_order (mode, value); | |
1308 | ||
44037a66 TG |
1309 | /* Now clear the chosen bits in OP0, |
1310 | except that if VALUE is -1 we need not bother. */ | |
c505fc06 RS |
1311 | /* We keep the intermediates in registers to allow CSE to combine |
1312 | consecutive bitfield assignments. */ | |
44037a66 | 1313 | |
c505fc06 | 1314 | temp = force_reg (mode, op0); |
44037a66 TG |
1315 | |
1316 | if (! all_one) | |
1317 | { | |
ee45a32d EB |
1318 | rtx mask = mask_rtx (mode, bitnum, bitsize, 1); |
1319 | if (reverse) | |
1320 | mask = flip_storage_order (mode, mask); | |
1321 | temp = expand_binop (mode, and_optab, temp, mask, | |
c505fc06 RS |
1322 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1323 | temp = force_reg (mode, temp); | |
44037a66 | 1324 | } |
44037a66 TG |
1325 | |
1326 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
1327 | ||
1328 | if (! all_zero) | |
c505fc06 RS |
1329 | { |
1330 | temp = expand_binop (mode, ior_optab, temp, value, | |
1331 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
1332 | temp = force_reg (mode, temp); | |
1333 | } | |
1334 | ||
44037a66 | 1335 | if (op0 != temp) |
4679504c UB |
1336 | { |
1337 | op0 = copy_rtx (op0); | |
1338 | emit_move_insn (op0, temp); | |
1339 | } | |
44037a66 TG |
1340 | } |
1341 | \f | |
06c94bce | 1342 | /* Store a bit field that is split across multiple accessible memory objects. |
44037a66 | 1343 | |
06c94bce | 1344 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
44037a66 TG |
1345 | BITSIZE is the field width; BITPOS the position of its first bit |
1346 | (within the word). | |
1a527092 RS |
1347 | VALUE is the value to store, which has mode VALUE_MODE. |
1348 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is | |
1349 | a BLKmode MEM. | |
06c94bce | 1350 | |
ee45a32d EB |
1351 | If REVERSE is true, the store is to be done in reverse order. |
1352 | ||
06c94bce | 1353 | This does not yet handle fields wider than BITS_PER_WORD. */ |
44037a66 TG |
1354 | |
1355 | static void | |
1a527092 RS |
1356 | store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1357 | unsigned HOST_WIDE_INT bitsize, | |
1169e45d | 1358 | unsigned HOST_WIDE_INT bitpos, |
8c59e5e7 | 1359 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1a527092 | 1360 | rtx value, scalar_int_mode value_mode, bool reverse) |
44037a66 | 1361 | { |
ee45a32d | 1362 | unsigned int unit, total_bits, bitsdone = 0; |
4ee16841 | 1363 | |
0eb61c19 DE |
1364 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1365 | much at a time. */ | |
f8cfc6aa | 1366 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1367 | unit = BITS_PER_WORD; |
1368 | else | |
04050c69 | 1369 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e54d80d0 | 1370 | |
ebb99f96 BE |
1371 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1372 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us | |
1373 | again, and we will mutually recurse forever. */ | |
1a527092 RS |
1374 | if (MEM_P (op0) && op0_mode.exists ()) |
1375 | unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ())); | |
ebb99f96 | 1376 | |
3d709ff0 RS |
1377 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1378 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1379 | that VALUE might be a floating-point constant. */ | |
481683e1 | 1380 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
3d709ff0 RS |
1381 | { |
1382 | rtx word = gen_lowpart_common (word_mode, value); | |
1383 | ||
bc8a0e39 | 1384 | if (word && (value != word)) |
3d709ff0 RS |
1385 | value = word; |
1386 | else | |
1a527092 RS |
1387 | value = gen_lowpart_common (word_mode, force_reg (value_mode, value)); |
1388 | value_mode = word_mode; | |
3d709ff0 | 1389 | } |
44037a66 | 1390 | |
1a527092 | 1391 | total_bits = GET_MODE_BITSIZE (value_mode); |
ee45a32d | 1392 | |
06c94bce | 1393 | while (bitsdone < bitsize) |
44037a66 | 1394 | { |
770ae6cc | 1395 | unsigned HOST_WIDE_INT thissize; |
770ae6cc RK |
1396 | unsigned HOST_WIDE_INT thispos; |
1397 | unsigned HOST_WIDE_INT offset; | |
1a527092 | 1398 | rtx part; |
44037a66 | 1399 | |
06c94bce RS |
1400 | offset = (bitpos + bitsdone) / unit; |
1401 | thispos = (bitpos + bitsdone) % unit; | |
44037a66 | 1402 | |
f1cc9589 | 1403 | /* When region of bytes we can touch is restricted, decrease |
bd3647bf JJ |
1404 | UNIT close to the end of the region as needed. If op0 is a REG |
1405 | or SUBREG of REG, don't do this, as there can't be data races | |
1406 | on a register and we can expand shorter code in some cases. */ | |
8c59e5e7 | 1407 | if (maybe_ne (bitregion_end, 0U) |
f1cc9589 | 1408 | && unit > BITS_PER_UNIT |
8c59e5e7 | 1409 | && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1) |
bd3647bf JJ |
1410 | && !REG_P (op0) |
1411 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) | |
f1cc9589 JJ |
1412 | { |
1413 | unit = unit / 2; | |
1414 | continue; | |
1415 | } | |
1416 | ||
0eb61c19 DE |
1417 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1418 | store_fixed_bit_field will call us again, and we will mutually | |
1419 | recurse forever. */ | |
1420 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1421 | thissize = MIN (thissize, unit - thispos); | |
44037a66 | 1422 | |
ee45a32d | 1423 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
f76b9db2 ILT |
1424 | { |
1425 | /* Fetch successively less significant portions. */ | |
481683e1 | 1426 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1427 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1428 | >> (bitsize - bitsdone - thissize)) | |
fecfbfa4 | 1429 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
ee45a32d EB |
1430 | /* Likewise, but the source is little-endian. */ |
1431 | else if (reverse) | |
1a527092 RS |
1432 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1433 | thissize, | |
ee45a32d EB |
1434 | bitsize - bitsdone - thissize, |
1435 | NULL_RTX, 1, false); | |
f76b9db2 | 1436 | else |
1a527092 RS |
1437 | /* The args are chosen so that the last part includes the |
1438 | lsb. Give extract_bit_field the value it needs (with | |
1439 | endianness compensation) to fetch the piece we want. */ | |
1440 | part = extract_fixed_bit_field (word_mode, value, value_mode, | |
1441 | thissize, | |
1442 | total_bits - bitsize + bitsdone, | |
1443 | NULL_RTX, 1, false); | |
f76b9db2 | 1444 | } |
06c94bce | 1445 | else |
f76b9db2 ILT |
1446 | { |
1447 | /* Fetch successively more significant portions. */ | |
481683e1 | 1448 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1449 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1450 | >> bitsdone) | |
fecfbfa4 | 1451 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
ee45a32d EB |
1452 | /* Likewise, but the source is big-endian. */ |
1453 | else if (reverse) | |
1a527092 RS |
1454 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1455 | thissize, | |
ee45a32d EB |
1456 | total_bits - bitsdone - thissize, |
1457 | NULL_RTX, 1, false); | |
f76b9db2 | 1458 | else |
1a527092 RS |
1459 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1460 | thissize, bitsdone, NULL_RTX, | |
1461 | 1, false); | |
f76b9db2 | 1462 | } |
44037a66 | 1463 | |
867a0126 | 1464 | /* If OP0 is a register, then handle OFFSET here. */ |
1a527092 RS |
1465 | rtx op0_piece = op0; |
1466 | opt_scalar_int_mode op0_piece_mode = op0_mode; | |
867a0126 | 1467 | if (SUBREG_P (op0) || REG_P (op0)) |
06c94bce | 1468 | { |
1a527092 RS |
1469 | scalar_int_mode imode; |
1470 | if (op0_mode.exists (&imode) | |
1471 | && GET_MODE_SIZE (imode) < UNITS_PER_WORD) | |
1472 | { | |
1473 | if (offset) | |
1474 | op0_piece = const0_rtx; | |
1475 | } | |
19228b93 | 1476 | else |
1a527092 RS |
1477 | { |
1478 | op0_piece = operand_subword_force (op0, | |
1479 | offset * unit / BITS_PER_WORD, | |
1480 | GET_MODE (op0)); | |
1481 | op0_piece_mode = word_mode; | |
1482 | } | |
bd3647bf | 1483 | offset &= BITS_PER_WORD / unit - 1; |
06c94bce | 1484 | } |
44037a66 | 1485 | |
bebf0797 | 1486 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
19228b93 | 1487 | it is just an out-of-bounds access. Ignore it. */ |
1a527092 RS |
1488 | if (op0_piece != const0_rtx) |
1489 | store_fixed_bit_field (op0_piece, op0_piece_mode, thissize, | |
1490 | offset * unit + thispos, bitregion_start, | |
1491 | bitregion_end, part, word_mode, reverse); | |
06c94bce RS |
1492 | bitsdone += thissize; |
1493 | } | |
44037a66 TG |
1494 | } |
1495 | \f | |
6d7db3c5 RS |
1496 | /* A subroutine of extract_bit_field_1 that converts return value X |
1497 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1498 | to extract_bit_field. */ | |
44037a66 | 1499 | |
6d7db3c5 | 1500 | static rtx |
ef4bddc2 RS |
1501 | convert_extracted_bit_field (rtx x, machine_mode mode, |
1502 | machine_mode tmode, bool unsignedp) | |
6d7db3c5 RS |
1503 | { |
1504 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1505 | return x; | |
44037a66 | 1506 | |
6d7db3c5 RS |
1507 | /* If the x mode is not a scalar integral, first convert to the |
1508 | integer mode of that size and then access it as a floating-point | |
1509 | value via a SUBREG. */ | |
1510 | if (!SCALAR_INT_MODE_P (tmode)) | |
1511 | { | |
304b9962 | 1512 | scalar_int_mode int_mode = int_mode_for_mode (tmode).require (); |
fffbab82 RS |
1513 | x = convert_to_mode (int_mode, x, unsignedp); |
1514 | x = force_reg (int_mode, x); | |
6d7db3c5 RS |
1515 | return gen_lowpart (tmode, x); |
1516 | } | |
44037a66 | 1517 | |
6d7db3c5 RS |
1518 | return convert_to_mode (tmode, x, unsignedp); |
1519 | } | |
1520 | ||
a20556e4 RS |
1521 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1522 | Return the extracted value on success, otherwise return null. | |
1a527092 RS |
1523 | EXTV describes the extraction instruction to use. If OP0_MODE |
1524 | is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM. | |
1525 | The other arguments are as for extract_bit_field. */ | |
a20556e4 RS |
1526 | |
1527 | static rtx | |
fcdd52b7 | 1528 | extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, |
1a527092 | 1529 | opt_scalar_int_mode op0_mode, |
fcdd52b7 | 1530 | unsigned HOST_WIDE_INT bitsize, |
a20556e4 RS |
1531 | unsigned HOST_WIDE_INT bitnum, |
1532 | int unsignedp, rtx target, | |
ef4bddc2 | 1533 | machine_mode mode, machine_mode tmode) |
a20556e4 | 1534 | { |
99b1c316 | 1535 | class expand_operand ops[4]; |
a20556e4 RS |
1536 | rtx spec_target = target; |
1537 | rtx spec_target_subreg = 0; | |
1a527092 | 1538 | scalar_int_mode ext_mode = extv->field_mode; |
a20556e4 RS |
1539 | unsigned unit = GET_MODE_BITSIZE (ext_mode); |
1540 | ||
1541 | if (bitsize == 0 || unit < bitsize) | |
1542 | return NULL_RTX; | |
1543 | ||
1544 | if (MEM_P (op0)) | |
26f8b976 | 1545 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
1546 | op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, |
1547 | &bitnum); | |
a20556e4 RS |
1548 | else |
1549 | { | |
1550 | /* Convert from counting within OP0 to counting in EXT_MODE. */ | |
1551 | if (BYTES_BIG_ENDIAN) | |
1a527092 | 1552 | bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); |
a20556e4 RS |
1553 | |
1554 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1555 | acceptable to the format of ext(z)v. */ | |
1a527092 | 1556 | if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode) |
a20556e4 | 1557 | return NULL_RTX; |
1a527092 | 1558 | if (REG_P (op0) && op0_mode.require () != ext_mode) |
a20556e4 RS |
1559 | op0 = gen_lowpart_SUBREG (ext_mode, op0); |
1560 | } | |
1561 | ||
1562 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | |
1563 | "backwards" from the size of the unit we are extracting from. | |
1564 | Otherwise, we count bits from the most significant on a | |
1565 | BYTES/BITS_BIG_ENDIAN machine. */ | |
1566 | ||
1567 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1568 | bitnum = unit - bitsize - bitnum; | |
1569 | ||
1570 | if (target == 0) | |
1571 | target = spec_target = gen_reg_rtx (tmode); | |
1572 | ||
1573 | if (GET_MODE (target) != ext_mode) | |
1574 | { | |
c8e4cb8a | 1575 | rtx temp; |
a20556e4 RS |
1576 | /* Don't use LHS paradoxical subreg if explicit truncation is needed |
1577 | between the mode of the extraction (word_mode) and the target | |
1578 | mode. Instead, create a temporary and use convert_move to set | |
1579 | the target. */ | |
1580 | if (REG_P (target) | |
c8e4cb8a | 1581 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode) |
1582 | && (temp = gen_lowpart_if_possible (ext_mode, target))) | |
a20556e4 | 1583 | { |
c8e4cb8a | 1584 | target = temp; |
bd4288c0 | 1585 | if (partial_subreg_p (GET_MODE (spec_target), ext_mode)) |
a20556e4 RS |
1586 | spec_target_subreg = target; |
1587 | } | |
1588 | else | |
1589 | target = gen_reg_rtx (ext_mode); | |
1590 | } | |
1591 | ||
1592 | create_output_operand (&ops[0], target, ext_mode); | |
1593 | create_fixed_operand (&ops[1], op0); | |
1594 | create_integer_operand (&ops[2], bitsize); | |
1595 | create_integer_operand (&ops[3], bitnum); | |
fcdd52b7 | 1596 | if (maybe_expand_insn (extv->icode, 4, ops)) |
a20556e4 RS |
1597 | { |
1598 | target = ops[0].value; | |
1599 | if (target == spec_target) | |
1600 | return target; | |
1601 | if (target == spec_target_subreg) | |
1602 | return spec_target; | |
1603 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1604 | } | |
1605 | return NULL_RTX; | |
1606 | } | |
1607 | ||
fc60a416 RS |
1608 | /* See whether it would be valid to extract the part of OP0 described |
1609 | by BITNUM and BITSIZE into a value of mode MODE using a subreg | |
1610 | operation. Return the subreg if so, otherwise return null. */ | |
1611 | ||
1612 | static rtx | |
1613 | extract_bit_field_as_subreg (machine_mode mode, rtx op0, | |
1614 | poly_uint64 bitsize, poly_uint64 bitnum) | |
1615 | { | |
1616 | poly_uint64 bytenum; | |
1617 | if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum) | |
1618 | && known_eq (bitsize, GET_MODE_BITSIZE (mode)) | |
1619 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) | |
1620 | && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0))) | |
1621 | return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum); | |
1622 | return NULL_RTX; | |
1623 | } | |
1624 | ||
6d7db3c5 RS |
1625 | /* A subroutine of extract_bit_field, with the same arguments. |
1626 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1627 | if we can find no other means of implementing the operation. | |
1628 | if FALLBACK_P is false, return NULL instead. */ | |
1629 | ||
1630 | static rtx | |
fc60a416 RS |
1631 | extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
1632 | int unsignedp, rtx target, machine_mode mode, | |
1633 | machine_mode tmode, bool reverse, bool fallback_p, | |
1634 | rtx *alt_rtl) | |
44037a66 | 1635 | { |
b3694847 | 1636 | rtx op0 = str_rtx; |
ef4bddc2 | 1637 | machine_mode mode1; |
44037a66 | 1638 | |
44037a66 TG |
1639 | if (tmode == VOIDmode) |
1640 | tmode = mode; | |
6ca6193b | 1641 | |
44037a66 TG |
1642 | while (GET_CODE (op0) == SUBREG) |
1643 | { | |
2c58f7dd | 1644 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
44037a66 TG |
1645 | op0 = SUBREG_REG (op0); |
1646 | } | |
77295dec | 1647 | |
2c58f7dd | 1648 | /* If we have an out-of-bounds access to a register, just return an |
647eea9d | 1649 | uninitialized register of the required mode. This can occur if the |
2c58f7dd | 1650 | source code contains an out-of-bounds access to a small array. */ |
fc60a416 | 1651 | if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) |
2c58f7dd RS |
1652 | return gen_reg_rtx (tmode); |
1653 | ||
f8cfc6aa | 1654 | if (REG_P (op0) |
aac280fb | 1655 | && mode == GET_MODE (op0) |
fc60a416 RS |
1656 | && known_eq (bitnum, 0U) |
1657 | && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) | |
aac280fb | 1658 | { |
ee45a32d EB |
1659 | if (reverse) |
1660 | op0 = flip_storage_order (mode, op0); | |
0b69c29f | 1661 | /* We're trying to extract a full register from itself. */ |
aac280fb DD |
1662 | return op0; |
1663 | } | |
1664 | ||
ff03930a JJ |
1665 | /* First try to check for vector from vector extractions. */ |
1666 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1667 | && !MEM_P (op0) | |
1668 | && VECTOR_MODE_P (tmode) | |
e8f3b70d | 1669 | && known_eq (bitsize, GET_MODE_BITSIZE (tmode)) |
cf098191 | 1670 | && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode))) |
ff03930a JJ |
1671 | { |
1672 | machine_mode new_mode = GET_MODE (op0); | |
1673 | if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) | |
1674 | { | |
9da15d40 | 1675 | scalar_mode inner_mode = GET_MODE_INNER (tmode); |
73a699ae RS |
1676 | poly_uint64 nunits; |
1677 | if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)), | |
1678 | GET_MODE_UNIT_BITSIZE (tmode), &nunits) | |
f0955233 RS |
1679 | || !related_vector_mode (tmode, inner_mode, |
1680 | nunits).exists (&new_mode) | |
cf098191 | 1681 | || maybe_ne (GET_MODE_SIZE (new_mode), |
f0955233 | 1682 | GET_MODE_SIZE (GET_MODE (op0)))) |
ff03930a JJ |
1683 | new_mode = VOIDmode; |
1684 | } | |
fc60a416 | 1685 | poly_uint64 pos; |
ff03930a JJ |
1686 | if (new_mode != VOIDmode |
1687 | && (convert_optab_handler (vec_extract_optab, new_mode, tmode) | |
1688 | != CODE_FOR_nothing) | |
fc60a416 | 1689 | && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos)) |
ff03930a | 1690 | { |
99b1c316 | 1691 | class expand_operand ops[3]; |
ff03930a JJ |
1692 | machine_mode outermode = new_mode; |
1693 | machine_mode innermode = tmode; | |
1694 | enum insn_code icode | |
1695 | = convert_optab_handler (vec_extract_optab, outermode, innermode); | |
ff03930a JJ |
1696 | |
1697 | if (new_mode != GET_MODE (op0)) | |
1698 | op0 = gen_lowpart (new_mode, op0); | |
1699 | create_output_operand (&ops[0], target, innermode); | |
1700 | ops[0].target = 1; | |
1701 | create_input_operand (&ops[1], op0, outermode); | |
1702 | create_integer_operand (&ops[2], pos); | |
1703 | if (maybe_expand_insn (icode, 3, ops)) | |
1704 | { | |
1705 | if (alt_rtl && ops[0].target) | |
1706 | *alt_rtl = target; | |
1707 | target = ops[0].value; | |
1708 | if (GET_MODE (target) != mode) | |
1709 | return gen_lowpart (tmode, target); | |
1710 | return target; | |
1711 | } | |
1712 | } | |
1713 | } | |
1714 | ||
0890b981 AP |
1715 | /* See if we can get a better vector mode before extracting. */ |
1716 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1717 | && !MEM_P (op0) | |
1718 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1719 | { | |
ef4bddc2 | 1720 | machine_mode new_mode; |
0890b981 AP |
1721 | |
1722 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1723 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
325217ed CF |
1724 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1725 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1726 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1727 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1728 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1729 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1730 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1731 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
0890b981 AP |
1732 | else |
1733 | new_mode = MIN_MODE_VECTOR_INT; | |
1734 | ||
c94843d2 | 1735 | FOR_EACH_MODE_FROM (new_mode, new_mode) |
cf098191 RS |
1736 | if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0))) |
1737 | && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode)) | |
4e5929e4 JW |
1738 | && targetm.vector_mode_supported_p (new_mode) |
1739 | && targetm.modes_tieable_p (GET_MODE (op0), new_mode)) | |
0890b981 AP |
1740 | break; |
1741 | if (new_mode != VOIDmode) | |
1742 | op0 = gen_lowpart (new_mode, op0); | |
1743 | } | |
1744 | ||
997404de | 1745 | /* Use vec_extract patterns for extracting parts of vectors whenever |
b194a722 RS |
1746 | available. If that fails, see whether the current modes and bitregion |
1747 | give a natural subreg. */ | |
d21cefc2 | 1748 | machine_mode outermode = GET_MODE (op0); |
b194a722 | 1749 | if (VECTOR_MODE_P (outermode) && !MEM_P (op0)) |
997404de | 1750 | { |
b194a722 | 1751 | scalar_mode innermode = GET_MODE_INNER (outermode); |
ff03930a JJ |
1752 | enum insn_code icode |
1753 | = convert_optab_handler (vec_extract_optab, outermode, innermode); | |
b194a722 RS |
1754 | poly_uint64 pos; |
1755 | if (icode != CODE_FOR_nothing | |
1756 | && known_eq (bitsize, GET_MODE_BITSIZE (innermode)) | |
1757 | && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos)) | |
1758 | { | |
99b1c316 | 1759 | class expand_operand ops[3]; |
997404de | 1760 | |
b194a722 RS |
1761 | create_output_operand (&ops[0], target, innermode); |
1762 | ops[0].target = 1; | |
1763 | create_input_operand (&ops[1], op0, outermode); | |
1764 | create_integer_operand (&ops[2], pos); | |
1765 | if (maybe_expand_insn (icode, 3, ops)) | |
1766 | { | |
1767 | if (alt_rtl && ops[0].target) | |
1768 | *alt_rtl = target; | |
1769 | target = ops[0].value; | |
1770 | if (GET_MODE (target) != mode) | |
1771 | return gen_lowpart (tmode, target); | |
1772 | return target; | |
1773 | } | |
1774 | } | |
9ea52d27 RS |
1775 | /* Using subregs is useful if we're extracting one register vector |
1776 | from a multi-register vector. extract_bit_field_as_subreg checks | |
1777 | for valid bitsize and bitnum, so we don't need to do that here. */ | |
1778 | if (VECTOR_MODE_P (mode)) | |
997404de | 1779 | { |
b194a722 RS |
1780 | rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum); |
1781 | if (sub) | |
1782 | return sub; | |
997404de JH |
1783 | } |
1784 | } | |
1785 | ||
d006aa54 RH |
1786 | /* Make sure we are playing with integral modes. Pun with subregs |
1787 | if we aren't. */ | |
1a527092 | 1788 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
304b9962 | 1789 | scalar_int_mode imode; |
1a527092 | 1790 | if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) |
304b9962 RS |
1791 | { |
1792 | if (MEM_P (op0)) | |
1a527092 | 1793 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
304b9962 | 1794 | 0, MEM_SIZE (op0)); |
1a527092 | 1795 | else if (op0_mode.exists (&imode)) |
304b9962 RS |
1796 | { |
1797 | op0 = gen_lowpart (imode, op0); | |
1798 | ||
1799 | /* If we got a SUBREG, force it into a register since we | |
1800 | aren't going to be able to do another SUBREG on it. */ | |
1801 | if (GET_CODE (op0) == SUBREG) | |
1802 | op0 = force_reg (imode, op0); | |
1803 | } | |
1804 | else | |
1805 | { | |
cf098191 | 1806 | poly_int64 size = GET_MODE_SIZE (GET_MODE (op0)); |
304b9962 RS |
1807 | rtx mem = assign_stack_temp (GET_MODE (op0), size); |
1808 | emit_move_insn (mem, op0); | |
1809 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); | |
1810 | } | |
1811 | } | |
d006aa54 | 1812 | |
6ca6193b JDA |
1813 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1814 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1815 | if needed. */ | |
e98f90d3 | 1816 | |
f5d4f18c SL |
1817 | /* Get the mode of the field to use for atomic access or subreg |
1818 | conversion. */ | |
61b2290a RS |
1819 | if (!SCALAR_INT_MODE_P (tmode) |
1820 | || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1)) | |
1821 | mode1 = mode; | |
b8ab7fc8 RS |
1822 | gcc_assert (mode1 != BLKmode); |
1823 | ||
1824 | /* Extraction of a full MODE1 value can be done with a subreg as long | |
1825 | as the least significant bit of the value is the least significant | |
1826 | bit of either OP0 or a word of OP0. */ | |
fc60a416 | 1827 | if (!MEM_P (op0) && !reverse) |
b8ab7fc8 | 1828 | { |
fc60a416 | 1829 | rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum); |
b8ab7fc8 RS |
1830 | if (sub) |
1831 | return convert_extracted_bit_field (sub, mode, tmode, unsignedp); | |
1832 | } | |
1833 | ||
1834 | /* Extraction of a full MODE1 value can be done with a load as long as | |
1835 | the field is on a byte boundary and is sufficiently aligned. */ | |
2d7b38df RS |
1836 | poly_uint64 bytenum; |
1837 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum)) | |
b8ab7fc8 | 1838 | { |
2d7b38df | 1839 | op0 = adjust_bitfield_address (op0, mode1, bytenum); |
ee45a32d EB |
1840 | if (reverse) |
1841 | op0 = flip_storage_order (mode1, op0); | |
b8ab7fc8 | 1842 | return convert_extracted_bit_field (op0, mode, tmode, unsignedp); |
44037a66 | 1843 | } |
b8ab7fc8 | 1844 | |
fc60a416 RS |
1845 | /* If we have a memory source and a non-constant bit offset, restrict |
1846 | the memory to the referenced bytes. This is a worst-case fallback | |
1847 | but is useful for things like vector booleans. */ | |
1848 | if (MEM_P (op0) && !bitnum.is_constant ()) | |
1849 | { | |
1850 | bytenum = bits_to_bytes_round_down (bitnum); | |
1851 | bitnum = num_trailing_bits (bitnum); | |
1852 | poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize); | |
1853 | op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize); | |
1854 | op0_mode = opt_scalar_int_mode (); | |
1855 | } | |
1856 | ||
1857 | /* It's possible we'll need to handle other cases here for | |
1858 | polynomial bitnum and bitsize. */ | |
1859 | ||
1860 | /* From here on we need to be looking at a fixed-size insertion. */ | |
1861 | return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (), | |
1862 | bitnum.to_constant (), unsignedp, | |
508fa61b | 1863 | target, mode, tmode, reverse, fallback_p); |
fc60a416 RS |
1864 | } |
1865 | ||
1866 | /* Subroutine of extract_bit_field_1, with the same arguments, except | |
1867 | that BITSIZE and BITNUM are constant. Handle cases specific to | |
1868 | integral modes. If OP0_MODE is defined, it is the mode of OP0, | |
1869 | otherwise OP0 is a BLKmode MEM. */ | |
1870 | ||
1871 | static rtx | |
1872 | extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, | |
1873 | unsigned HOST_WIDE_INT bitsize, | |
1874 | unsigned HOST_WIDE_INT bitnum, int unsignedp, | |
1875 | rtx target, machine_mode mode, machine_mode tmode, | |
1876 | bool reverse, bool fallback_p) | |
1877 | { | |
44037a66 | 1878 | /* Handle fields bigger than a word. */ |
c410d49e | 1879 | |
44037a66 TG |
1880 | if (bitsize > BITS_PER_WORD) |
1881 | { | |
1882 | /* Here we transfer the words of the field | |
1883 | in the order least significant first. | |
1884 | This is because the most significant word is the one which may | |
1885 | be less than full. */ | |
1886 | ||
ee45a32d | 1887 | const bool backwards = WORDS_BIG_ENDIAN; |
770ae6cc RK |
1888 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1889 | unsigned int i; | |
f3f6fb16 | 1890 | rtx_insn *last; |
44037a66 | 1891 | |
02972eaf | 1892 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
44037a66 TG |
1893 | target = gen_reg_rtx (mode); |
1894 | ||
7d21a61e DD |
1895 | /* In case we're about to clobber a base register or something |
1896 | (see gcc.c-torture/execute/20040625-1.c). */ | |
fc60a416 | 1897 | if (reg_mentioned_p (target, op0)) |
7d21a61e DD |
1898 | target = gen_reg_rtx (mode); |
1899 | ||
34ea783b | 1900 | /* Indicate for flow that the entire target reg is being set. */ |
c41c1387 | 1901 | emit_clobber (target); |
34ea783b | 1902 | |
fc60a416 RS |
1903 | /* The mode must be fixed-size, since extract_bit_field_1 handles |
1904 | extractions from variable-sized objects before calling this | |
1905 | function. */ | |
cf098191 RS |
1906 | unsigned int target_size |
1907 | = GET_MODE_SIZE (GET_MODE (target)).to_constant (); | |
5ef0b50d | 1908 | last = get_last_insn (); |
44037a66 TG |
1909 | for (i = 0; i < nwords; i++) |
1910 | { | |
1911 | /* If I is 0, use the low-order word in both field and target; | |
1912 | if I is 1, use the next to lowest word; and so on. */ | |
77295dec | 1913 | /* Word number in TARGET to use. */ |
770ae6cc | 1914 | unsigned int wordnum |
fc60a416 | 1915 | = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i); |
77295dec | 1916 | /* Offset from start of field in OP0. */ |
ee45a32d | 1917 | unsigned int bit_offset = (backwards ^ reverse |
0cd9e9ee EB |
1918 | ? MAX ((int) bitsize - ((int) i + 1) |
1919 | * BITS_PER_WORD, | |
1920 | 0) | |
770ae6cc | 1921 | : (int) i * BITS_PER_WORD); |
44037a66 TG |
1922 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1923 | rtx result_part | |
5ef0b50d EB |
1924 | = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, |
1925 | bitsize - i * BITS_PER_WORD), | |
c6285bd7 | 1926 | bitnum + bit_offset, 1, target_part, |
f96bf49a | 1927 | mode, word_mode, reverse, fallback_p, NULL); |
44037a66 | 1928 | |
5b0264cb | 1929 | gcc_assert (target_part); |
5ef0b50d EB |
1930 | if (!result_part) |
1931 | { | |
1932 | delete_insns_since (last); | |
1933 | return NULL; | |
1934 | } | |
44037a66 TG |
1935 | |
1936 | if (result_part != target_part) | |
1937 | emit_move_insn (target_part, result_part); | |
1938 | } | |
1939 | ||
5f57dff0 | 1940 | if (unsignedp) |
77295dec DE |
1941 | { |
1942 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1943 | need to be zero'd out. */ | |
fc60a416 | 1944 | if (target_size > nwords * UNITS_PER_WORD) |
77295dec | 1945 | { |
770ae6cc | 1946 | unsigned int i, total_words; |
77295dec | 1947 | |
fc60a416 | 1948 | total_words = target_size / UNITS_PER_WORD; |
77295dec | 1949 | for (i = nwords; i < total_words; i++) |
04050c69 RK |
1950 | emit_move_insn |
1951 | (operand_subword (target, | |
0cd9e9ee | 1952 | backwards ? total_words - i - 1 : i, |
04050c69 RK |
1953 | 1, VOIDmode), |
1954 | const0_rtx); | |
77295dec DE |
1955 | } |
1956 | return target; | |
1957 | } | |
1958 | ||
5f57dff0 JW |
1959 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1960 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
eb6c3df1 | 1961 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
5f57dff0 | 1962 | return expand_shift (RSHIFT_EXPR, mode, target, |
eb6c3df1 | 1963 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
44037a66 | 1964 | } |
c410d49e | 1965 | |
b8ab7fc8 RS |
1966 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1967 | If the region spans two words, defer to extract_split_bit_field. */ | |
1a527092 | 1968 | if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD) |
44037a66 | 1969 | { |
867a0126 | 1970 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
470032d7 | 1971 | { |
b8ab7fc8 RS |
1972 | if (!fallback_p) |
1973 | return NULL_RTX; | |
1a527092 RS |
1974 | target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1975 | unsignedp, reverse); | |
b8ab7fc8 | 1976 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
470032d7 | 1977 | } |
1a527092 | 1978 | op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), |
867a0126 | 1979 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
1a527092 | 1980 | op0_mode = word_mode; |
867a0126 | 1981 | bitnum %= BITS_PER_WORD; |
44037a66 | 1982 | } |
44037a66 | 1983 | |
b8ab7fc8 RS |
1984 | /* From here on we know the desired field is smaller than a word. |
1985 | If OP0 is a register, it too fits within a word. */ | |
fcdd52b7 RS |
1986 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
1987 | extraction_insn extv; | |
1988 | if (!MEM_P (op0) | |
ee45a32d | 1989 | && !reverse |
c0a8a3e6 RS |
1990 | /* ??? We could limit the structure size to the part of OP0 that |
1991 | contains the field, with appropriate checks for endianness | |
bb149ca2 | 1992 | and TARGET_TRULY_NOOP_TRUNCATION. */ |
c0a8a3e6 | 1993 | && get_best_reg_extraction_insn (&extv, pattern, |
1a527092 | 1994 | GET_MODE_BITSIZE (op0_mode.require ()), |
fcdd52b7 | 1995 | tmode)) |
44037a66 | 1996 | { |
1a527092 RS |
1997 | rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, |
1998 | bitsize, bitnum, | |
a20556e4 | 1999 | unsignedp, target, mode, |
fcdd52b7 | 2000 | tmode); |
a20556e4 RS |
2001 | if (result) |
2002 | return result; | |
44037a66 | 2003 | } |
f76b9db2 | 2004 | |
6d7db3c5 RS |
2005 | /* If OP0 is a memory, try copying it to a register and seeing if a |
2006 | cheap register alternative is available. */ | |
ee45a32d | 2007 | if (MEM_P (op0) & !reverse) |
6d7db3c5 | 2008 | { |
f5d4f18c SL |
2009 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
2010 | tmode)) | |
17a73ba0 | 2011 | { |
1a527092 RS |
2012 | rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, |
2013 | bitsize, bitnum, | |
2014 | unsignedp, target, mode, | |
fcdd52b7 | 2015 | tmode); |
17a73ba0 RS |
2016 | if (result) |
2017 | return result; | |
2018 | } | |
2019 | ||
f3f6fb16 | 2020 | rtx_insn *last = get_last_insn (); |
f76b9db2 | 2021 | |
fcdd52b7 RS |
2022 | /* Try loading part of OP0 into a register and extracting the |
2023 | bitfield from that. */ | |
2024 | unsigned HOST_WIDE_INT bitpos; | |
2025 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, | |
2026 | 0, 0, tmode, &bitpos); | |
2027 | if (xop0) | |
6d7db3c5 | 2028 | { |
fcdd52b7 RS |
2029 | xop0 = copy_to_reg (xop0); |
2030 | rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, | |
c6285bd7 | 2031 | unsignedp, target, |
f96bf49a | 2032 | mode, tmode, reverse, false, NULL); |
fcdd52b7 RS |
2033 | if (result) |
2034 | return result; | |
2035 | delete_insns_since (last); | |
c410d49e | 2036 | } |
44037a66 | 2037 | } |
562fc702 | 2038 | |
6d7db3c5 RS |
2039 | if (!fallback_p) |
2040 | return NULL; | |
2041 | ||
508fa61b | 2042 | /* Find a correspondingly-sized integer field, so we can apply |
2043 | shifts and masks to it. */ | |
2044 | scalar_int_mode int_mode; | |
2045 | if (!int_mode_for_mode (tmode).exists (&int_mode)) | |
2046 | /* If this fails, we should probably push op0 out to memory and then | |
2047 | do a load. */ | |
2048 | int_mode = int_mode_for_mode (mode).require (); | |
2049 | ||
2050 | target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize, | |
2051 | bitnum, target, unsignedp, reverse); | |
2052 | ||
2053 | /* Complex values must be reversed piecewise, so we need to undo the global | |
2054 | reversal, convert to the complex mode and reverse again. */ | |
2055 | if (reverse && COMPLEX_MODE_P (tmode)) | |
2056 | { | |
2057 | target = flip_storage_order (int_mode, target); | |
2058 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
2059 | target = flip_storage_order (tmode, target); | |
2060 | } | |
2061 | else | |
2062 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
2063 | ||
2064 | return target; | |
6d7db3c5 RS |
2065 | } |
2066 | ||
2067 | /* Generate code to extract a byte-field from STR_RTX | |
2068 | containing BITSIZE bits, starting at BITNUM, | |
2069 | and put it in TARGET if possible (if TARGET is nonzero). | |
2070 | Regardless of TARGET, we return the rtx for where the value is placed. | |
2071 | ||
2072 | STR_RTX is the structure containing the byte (a REG or MEM). | |
2073 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
2074 | MODE is the natural mode of the field value once extracted. | |
2075 | TMODE is the mode the caller would like the value to have; | |
2076 | but the value may be returned with type MODE instead. | |
2077 | ||
ee45a32d EB |
2078 | If REVERSE is true, the extraction is to be done in reverse order. |
2079 | ||
6d7db3c5 RS |
2080 | If a TARGET is specified and we can store in it at no extra cost, |
2081 | we do so, and return TARGET. | |
2082 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
b3baefb2 BE |
2083 | if they are equally easy. |
2084 | ||
2085 | If the result can be stored at TARGET, and ALT_RTL is non-NULL, | |
2086 | then *ALT_RTL is set to TARGET (before legitimziation). */ | |
6d7db3c5 RS |
2087 | |
2088 | rtx | |
fc60a416 RS |
2089 | extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
2090 | int unsignedp, rtx target, machine_mode mode, | |
2091 | machine_mode tmode, bool reverse, rtx *alt_rtl) | |
6d7db3c5 | 2092 | { |
ef4bddc2 | 2093 | machine_mode mode1; |
f5d4f18c SL |
2094 | |
2095 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ | |
73a699ae | 2096 | if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0)) |
f5d4f18c | 2097 | mode1 = GET_MODE (str_rtx); |
73a699ae | 2098 | else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0)) |
f5d4f18c SL |
2099 | mode1 = GET_MODE (target); |
2100 | else | |
2101 | mode1 = tmode; | |
2102 | ||
fc60a416 | 2103 | unsigned HOST_WIDE_INT ibitsize, ibitnum; |
0ef40942 | 2104 | scalar_int_mode int_mode; |
fc60a416 RS |
2105 | if (bitsize.is_constant (&ibitsize) |
2106 | && bitnum.is_constant (&ibitnum) | |
2107 | && is_a <scalar_int_mode> (mode1, &int_mode) | |
2108 | && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, | |
2109 | int_mode, 0, 0)) | |
f5d4f18c | 2110 | { |
0ef40942 | 2111 | /* Extraction of a full INT_MODE value can be done with a simple load. |
b6dd42a9 BE |
2112 | We know here that the field can be accessed with one single |
2113 | instruction. For targets that support unaligned memory, | |
2114 | an unaligned access may be necessary. */ | |
fc60a416 | 2115 | if (ibitsize == GET_MODE_BITSIZE (int_mode)) |
6f4e9cf8 | 2116 | { |
0ef40942 | 2117 | rtx result = adjust_bitfield_address (str_rtx, int_mode, |
fc60a416 | 2118 | ibitnum / BITS_PER_UNIT); |
ee45a32d | 2119 | if (reverse) |
0ef40942 | 2120 | result = flip_storage_order (int_mode, result); |
fc60a416 | 2121 | gcc_assert (ibitnum % BITS_PER_UNIT == 0); |
53c615a2 | 2122 | return convert_extracted_bit_field (result, mode, tmode, unsignedp); |
6f4e9cf8 BE |
2123 | } |
2124 | ||
fc60a416 RS |
2125 | str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum, |
2126 | &ibitnum); | |
2127 | gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); | |
53c615a2 | 2128 | str_rtx = copy_to_reg (str_rtx); |
fc60a416 RS |
2129 | return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp, |
2130 | target, mode, tmode, reverse, true, alt_rtl); | |
f5d4f18c | 2131 | } |
53c615a2 | 2132 | |
c6285bd7 | 2133 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
f96bf49a | 2134 | target, mode, tmode, reverse, true, alt_rtl); |
44037a66 TG |
2135 | } |
2136 | \f | |
b8ab7fc8 | 2137 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
1a527092 RS |
2138 | from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0, |
2139 | otherwise OP0 is a BLKmode MEM. | |
44037a66 TG |
2140 | |
2141 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
ee45a32d EB |
2142 | If REVERSE is true, the extraction is to be done in reverse order. |
2143 | ||
44037a66 TG |
2144 | If TARGET is nonzero, attempts to store the value there |
2145 | and return TARGET, but this is not guaranteed. | |
04050c69 | 2146 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
44037a66 TG |
2147 | |
2148 | static rtx | |
ef4bddc2 | 2149 | extract_fixed_bit_field (machine_mode tmode, rtx op0, |
1a527092 | 2150 | opt_scalar_int_mode op0_mode, |
502b8322 | 2151 | unsigned HOST_WIDE_INT bitsize, |
b8ab7fc8 | 2152 | unsigned HOST_WIDE_INT bitnum, rtx target, |
ee45a32d | 2153 | int unsignedp, bool reverse) |
44037a66 | 2154 | { |
1a527092 | 2155 | scalar_int_mode mode; |
b8ab7fc8 | 2156 | if (MEM_P (op0)) |
44037a66 | 2157 | { |
ae927046 RS |
2158 | if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), |
2159 | BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode)) | |
44037a66 TG |
2160 | /* The only way this should occur is if the field spans word |
2161 | boundaries. */ | |
1a527092 RS |
2162 | return extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
2163 | unsignedp, reverse); | |
44037a66 | 2164 | |
f5d4f18c | 2165 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 | 2166 | } |
1a527092 RS |
2167 | else |
2168 | mode = op0_mode.require (); | |
44037a66 | 2169 | |
1a527092 | 2170 | return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum, |
ee45a32d | 2171 | target, unsignedp, reverse); |
6f4e9cf8 BE |
2172 | } |
2173 | ||
2174 | /* Helper function for extract_fixed_bit_field, extracts | |
1a527092 RS |
2175 | the bit field always using MODE, which is the mode of OP0. |
2176 | The other arguments are as for extract_fixed_bit_field. */ | |
6f4e9cf8 BE |
2177 | |
2178 | static rtx | |
1a527092 | 2179 | extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode, |
6f4e9cf8 BE |
2180 | unsigned HOST_WIDE_INT bitsize, |
2181 | unsigned HOST_WIDE_INT bitnum, rtx target, | |
ee45a32d | 2182 | int unsignedp, bool reverse) |
6f4e9cf8 | 2183 | { |
b8ab7fc8 RS |
2184 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
2185 | for invalid input, such as extract equivalent of f5 from | |
2186 | gcc.dg/pr48335-2.c. */ | |
37811a73 | 2187 | |
ee45a32d | 2188 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
b8ab7fc8 | 2189 | /* BITNUM is the distance between our msb and that of OP0. |
04050c69 | 2190 | Convert it to the distance from the lsb. */ |
b8ab7fc8 | 2191 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
44037a66 | 2192 | |
b8ab7fc8 | 2193 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
44037a66 | 2194 | We have reduced the big-endian case to the little-endian case. */ |
ee45a32d EB |
2195 | if (reverse) |
2196 | op0 = flip_storage_order (mode, op0); | |
44037a66 TG |
2197 | |
2198 | if (unsignedp) | |
2199 | { | |
b8ab7fc8 | 2200 | if (bitnum) |
44037a66 TG |
2201 | { |
2202 | /* If the field does not already start at the lsb, | |
2203 | shift it so it does. */ | |
44037a66 | 2204 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 2205 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
6399c0ab SB |
2206 | if (tmode != mode) |
2207 | subtarget = 0; | |
b8ab7fc8 | 2208 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
44037a66 | 2209 | } |
1a527092 RS |
2210 | /* Convert the value to the desired mode. TMODE must also be a |
2211 | scalar integer for this conversion to make sense, since we | |
2212 | shouldn't reinterpret the bits. */ | |
2213 | scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode); | |
2214 | if (mode != new_mode) | |
2215 | op0 = convert_to_mode (new_mode, op0, 1); | |
44037a66 TG |
2216 | |
2217 | /* Unless the msb of the field used to be the msb when we shifted, | |
2218 | mask out the upper bits. */ | |
2219 | ||
b8ab7fc8 | 2220 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) |
1a527092 RS |
2221 | return expand_binop (new_mode, and_optab, op0, |
2222 | mask_rtx (new_mode, 0, bitsize, 0), | |
44037a66 TG |
2223 | target, 1, OPTAB_LIB_WIDEN); |
2224 | return op0; | |
2225 | } | |
2226 | ||
2227 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
2228 | then arithmetic-shift its lsb to the lsb of the word. */ | |
2229 | op0 = force_reg (mode, op0); | |
44037a66 TG |
2230 | |
2231 | /* Find the narrowest integer mode that contains the field. */ | |
2232 | ||
59b51186 RS |
2233 | opt_scalar_int_mode mode_iter; |
2234 | FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) | |
2235 | if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum) | |
2236 | break; | |
2237 | ||
2238 | mode = mode_iter.require (); | |
2239 | op0 = convert_to_mode (mode, op0, 0); | |
44037a66 | 2240 | |
ccb1b17b JJ |
2241 | if (mode != tmode) |
2242 | target = 0; | |
2243 | ||
b8ab7fc8 | 2244 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
44037a66 | 2245 | { |
b8ab7fc8 | 2246 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
44037a66 | 2247 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 2248 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
2249 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
2250 | } | |
2251 | ||
2252 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 2253 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
44037a66 | 2254 | } |
44037a66 TG |
2255 | |
2256 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
088c5368 | 2257 | VALUE << BITPOS. */ |
44037a66 TG |
2258 | |
2259 | static rtx | |
ef4bddc2 | 2260 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
088c5368 | 2261 | int bitpos) |
44037a66 | 2262 | { |
807e902e | 2263 | return immed_wide_int_const (wi::lshift (value, bitpos), mode); |
44037a66 TG |
2264 | } |
2265 | \f | |
2266 | /* Extract a bit field that is split across two words | |
2267 | and return an RTX for the result. | |
2268 | ||
2269 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
2270 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
ee45a32d | 2271 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. |
1a527092 RS |
2272 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |
2273 | a BLKmode MEM. | |
ee45a32d EB |
2274 | |
2275 | If REVERSE is true, the extraction is to be done in reverse order. */ | |
44037a66 TG |
2276 | |
2277 | static rtx | |
1a527092 RS |
2278 | extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
2279 | unsigned HOST_WIDE_INT bitsize, | |
ee45a32d EB |
2280 | unsigned HOST_WIDE_INT bitpos, int unsignedp, |
2281 | bool reverse) | |
44037a66 | 2282 | { |
770ae6cc RK |
2283 | unsigned int unit; |
2284 | unsigned int bitsdone = 0; | |
c16ddde3 | 2285 | rtx result = NULL_RTX; |
06c94bce | 2286 | int first = 1; |
44037a66 | 2287 | |
4ee16841 DE |
2288 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
2289 | much at a time. */ | |
f8cfc6aa | 2290 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
2291 | unit = BITS_PER_WORD; |
2292 | else | |
609023ff | 2293 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
4ee16841 | 2294 | |
06c94bce RS |
2295 | while (bitsdone < bitsize) |
2296 | { | |
770ae6cc | 2297 | unsigned HOST_WIDE_INT thissize; |
1a527092 | 2298 | rtx part; |
770ae6cc RK |
2299 | unsigned HOST_WIDE_INT thispos; |
2300 | unsigned HOST_WIDE_INT offset; | |
06c94bce RS |
2301 | |
2302 | offset = (bitpos + bitsdone) / unit; | |
2303 | thispos = (bitpos + bitsdone) % unit; | |
2304 | ||
0eb61c19 DE |
2305 | /* THISSIZE must not overrun a word boundary. Otherwise, |
2306 | extract_fixed_bit_field will call us again, and we will mutually | |
2307 | recurse forever. */ | |
2308 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
2309 | thissize = MIN (thissize, unit - thispos); | |
06c94bce | 2310 | |
867a0126 | 2311 | /* If OP0 is a register, then handle OFFSET here. */ |
1a527092 RS |
2312 | rtx op0_piece = op0; |
2313 | opt_scalar_int_mode op0_piece_mode = op0_mode; | |
867a0126 | 2314 | if (SUBREG_P (op0) || REG_P (op0)) |
06c94bce | 2315 | { |
1a527092 RS |
2316 | op0_piece = operand_subword_force (op0, offset, op0_mode.require ()); |
2317 | op0_piece_mode = word_mode; | |
06c94bce RS |
2318 | offset = 0; |
2319 | } | |
06c94bce | 2320 | |
06c94bce | 2321 | /* Extract the parts in bit-counting order, |
0eb61c19 | 2322 | whose meaning is determined by BYTES_PER_UNIT. |
b8ab7fc8 | 2323 | OFFSET is in UNITs, and UNIT is in bits. */ |
1a527092 RS |
2324 | part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode, |
2325 | thissize, offset * unit + thispos, | |
2326 | 0, 1, reverse); | |
06c94bce | 2327 | bitsdone += thissize; |
44037a66 | 2328 | |
06c94bce | 2329 | /* Shift this part into place for the result. */ |
ee45a32d | 2330 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
f76b9db2 ILT |
2331 | { |
2332 | if (bitsize != bitsdone) | |
2333 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 2334 | bitsize - bitsdone, 0, 1); |
f76b9db2 ILT |
2335 | } |
2336 | else | |
2337 | { | |
2338 | if (bitsdone != thissize) | |
2339 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 2340 | bitsdone - thissize, 0, 1); |
f76b9db2 | 2341 | } |
44037a66 | 2342 | |
06c94bce RS |
2343 | if (first) |
2344 | result = part; | |
2345 | else | |
2346 | /* Combine the parts with bitwise or. This works | |
2347 | because we extracted each part as an unsigned bit field. */ | |
2348 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
2349 | OPTAB_LIB_WIDEN); | |
2350 | ||
2351 | first = 0; | |
2352 | } | |
44037a66 TG |
2353 | |
2354 | /* Unsigned bit field: we are done. */ | |
2355 | if (unsignedp) | |
2356 | return result; | |
2357 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
2358 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
eb6c3df1 | 2359 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 | 2360 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
eb6c3df1 | 2361 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 TG |
2362 | } |
2363 | \f | |
18b526e8 RS |
2364 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2365 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
2366 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
2367 | mode is unknown (as for CC modes) or if the extraction would involve | |
2368 | unprofitable mode punning. Return the value on success, otherwise | |
2369 | return null. | |
2370 | ||
2371 | This is different from gen_lowpart* in these respects: | |
2372 | ||
2373 | - the returned value must always be considered an rvalue | |
2374 | ||
2375 | - when MODE is wider than SRC_MODE, the extraction involves | |
2376 | a zero extension | |
2377 | ||
2378 | - when MODE is smaller than SRC_MODE, the extraction involves | |
bb149ca2 | 2379 | a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION). |
18b526e8 RS |
2380 | |
2381 | In other words, this routine performs a computation, whereas the | |
2382 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2383 | operations. */ | |
2384 | ||
2385 | rtx | |
ef4bddc2 | 2386 | extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) |
18b526e8 | 2387 | { |
095a2d76 | 2388 | scalar_int_mode int_mode, src_int_mode; |
18b526e8 RS |
2389 | |
2390 | if (mode == src_mode) | |
2391 | return src; | |
2392 | ||
2393 | if (CONSTANT_P (src)) | |
d898d29b JJ |
2394 | { |
2395 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2396 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2397 | invalid SUBREGs. */ | |
91914e56 | 2398 | poly_uint64 byte = subreg_lowpart_offset (mode, src_mode); |
d898d29b JJ |
2399 | rtx ret = simplify_subreg (mode, src, src_mode, byte); |
2400 | if (ret) | |
2401 | return ret; | |
2402 | ||
2403 | if (GET_MODE (src) == VOIDmode | |
2404 | || !validate_subreg (mode, src_mode, src, byte)) | |
2405 | return NULL_RTX; | |
2406 | ||
2407 | src = force_reg (GET_MODE (src), src); | |
2408 | return gen_rtx_SUBREG (mode, src, byte); | |
2409 | } | |
18b526e8 RS |
2410 | |
2411 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2412 | return NULL_RTX; | |
2413 | ||
73a699ae | 2414 | if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode)) |
99e1629f | 2415 | && targetm.modes_tieable_p (mode, src_mode)) |
18b526e8 RS |
2416 | { |
2417 | rtx x = gen_lowpart_common (mode, src); | |
2418 | if (x) | |
2419 | return x; | |
2420 | } | |
2421 | ||
304b9962 RS |
2422 | if (!int_mode_for_mode (src_mode).exists (&src_int_mode) |
2423 | || !int_mode_for_mode (mode).exists (&int_mode)) | |
18b526e8 RS |
2424 | return NULL_RTX; |
2425 | ||
99e1629f | 2426 | if (!targetm.modes_tieable_p (src_int_mode, src_mode)) |
18b526e8 | 2427 | return NULL_RTX; |
99e1629f | 2428 | if (!targetm.modes_tieable_p (int_mode, mode)) |
18b526e8 RS |
2429 | return NULL_RTX; |
2430 | ||
2431 | src = gen_lowpart (src_int_mode, src); | |
daa97158 TC |
2432 | if (!validate_subreg (int_mode, src_int_mode, src, |
2433 | subreg_lowpart_offset (int_mode, src_int_mode))) | |
2434 | return NULL_RTX; | |
2435 | ||
18b526e8 RS |
2436 | src = convert_modes (int_mode, src_int_mode, src, true); |
2437 | src = gen_lowpart (mode, src); | |
2438 | return src; | |
2439 | } | |
2440 | \f | |
44037a66 TG |
2441 | /* Add INC into TARGET. */ |
2442 | ||
2443 | void | |
502b8322 | 2444 | expand_inc (rtx target, rtx inc) |
44037a66 TG |
2445 | { |
2446 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2447 | target, inc, | |
2448 | target, 0, OPTAB_LIB_WIDEN); | |
2449 | if (value != target) | |
2450 | emit_move_insn (target, value); | |
2451 | } | |
2452 | ||
2453 | /* Subtract DEC from TARGET. */ | |
2454 | ||
2455 | void | |
502b8322 | 2456 | expand_dec (rtx target, rtx dec) |
44037a66 TG |
2457 | { |
2458 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2459 | target, dec, | |
2460 | target, 0, OPTAB_LIB_WIDEN); | |
2461 | if (value != target) | |
2462 | emit_move_insn (target, value); | |
2463 | } | |
2464 | \f | |
2465 | /* Output a shift instruction for expression code CODE, | |
2466 | with SHIFTED being the rtx for the value to shift, | |
86529a49 | 2467 | and AMOUNT the rtx for the amount to shift by. |
44037a66 TG |
2468 | Store the result in the rtx TARGET, if that is convenient. |
2469 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
ea000c3f EB |
2470 | Return the rtx for where the value is. |
2471 | If that cannot be done, abort the compilation unless MAY_FAIL is true, | |
2472 | in which case 0 is returned. */ | |
44037a66 | 2473 | |
86529a49 | 2474 | static rtx |
ef4bddc2 | 2475 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
ea000c3f | 2476 | rtx amount, rtx target, int unsignedp, bool may_fail = false) |
44037a66 | 2477 | { |
b3694847 SS |
2478 | rtx op1, temp = 0; |
2479 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2480 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
71d46ca5 MM |
2481 | optab lshift_optab = ashl_optab; |
2482 | optab rshift_arith_optab = ashr_optab; | |
2483 | optab rshift_uns_optab = lshr_optab; | |
2484 | optab lrotate_optab = rotl_optab; | |
2485 | optab rrotate_optab = rotr_optab; | |
ef4bddc2 | 2486 | machine_mode op1_mode; |
a85cf8e9 | 2487 | scalar_mode scalar_mode = GET_MODE_INNER (mode); |
d858f359 | 2488 | int attempt; |
f40751dd | 2489 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 2490 | |
86529a49 | 2491 | op1 = amount; |
71d46ca5 MM |
2492 | op1_mode = GET_MODE (op1); |
2493 | ||
2494 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2495 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2496 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2497 | { | |
2498 | lshift_optab = vashl_optab; | |
2499 | rshift_arith_optab = vashr_optab; | |
2500 | rshift_uns_optab = vlshr_optab; | |
2501 | lrotate_optab = vrotl_optab; | |
2502 | rrotate_optab = vrotr_optab; | |
2503 | } | |
2504 | ||
44037a66 TG |
2505 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2506 | and shifted in the other direction; but that does not work | |
2507 | on all machines. */ | |
2508 | ||
166cdf4a RH |
2509 | if (SHIFT_COUNT_TRUNCATED) |
2510 | { | |
481683e1 | 2511 | if (CONST_INT_P (op1) |
0fb7aeda | 2512 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
a4ee446d | 2513 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) |
abd3c800 RS |
2514 | op1 = gen_int_shift_amount (mode, |
2515 | (unsigned HOST_WIDE_INT) INTVAL (op1) | |
2516 | % GET_MODE_BITSIZE (scalar_mode)); | |
166cdf4a | 2517 | else if (GET_CODE (op1) == SUBREG |
c1cb09ad | 2518 | && subreg_lowpart_p (op1) |
7afe2801 DM |
2519 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2520 | && SCALAR_INT_MODE_P (GET_MODE (op1))) | |
166cdf4a RH |
2521 | op1 = SUBREG_REG (op1); |
2522 | } | |
2ab0a5c4 | 2523 | |
75776c6d JJ |
2524 | /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, |
2525 | prefer left rotation, if op1 is from bitsize / 2 + 1 to | |
2526 | bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 | |
2527 | amount instead. */ | |
2528 | if (rotate | |
2529 | && CONST_INT_P (op1) | |
a4ee446d JJ |
2530 | && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, |
2531 | GET_MODE_BITSIZE (scalar_mode) - 1)) | |
75776c6d | 2532 | { |
abd3c800 RS |
2533 | op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode) |
2534 | - INTVAL (op1))); | |
75776c6d JJ |
2535 | left = !left; |
2536 | code = left ? LROTATE_EXPR : RROTATE_EXPR; | |
2537 | } | |
2538 | ||
a25efea0 TP |
2539 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |
2540 | Note that this is not the case for bigger values. For instance a rotation | |
2541 | of 0x01020304 by 16 bits gives 0x03040102 which is different from | |
2542 | 0x04030201 (bswapsi). */ | |
2543 | if (rotate | |
2544 | && CONST_INT_P (op1) | |
2545 | && INTVAL (op1) == BITS_PER_UNIT | |
2546 | && GET_MODE_SIZE (scalar_mode) == 2 | |
d235d09b JJ |
2547 | && optab_handler (bswap_optab, mode) != CODE_FOR_nothing) |
2548 | return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp); | |
a25efea0 | 2549 | |
44037a66 TG |
2550 | if (op1 == const0_rtx) |
2551 | return shifted; | |
2552 | ||
15bad393 RS |
2553 | /* Check whether its cheaper to implement a left shift by a constant |
2554 | bit count by a sequence of additions. */ | |
2555 | if (code == LSHIFT_EXPR | |
481683e1 | 2556 | && CONST_INT_P (op1) |
15bad393 | 2557 | && INTVAL (op1) > 0 |
a4ee446d | 2558 | && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) |
cb2eb96f | 2559 | && INTVAL (op1) < MAX_BITS_PER_WORD |
5322d07e NF |
2560 | && (shift_cost (speed, mode, INTVAL (op1)) |
2561 | > INTVAL (op1) * add_cost (speed, mode)) | |
2562 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) | |
15bad393 RS |
2563 | { |
2564 | int i; | |
2565 | for (i = 0; i < INTVAL (op1); i++) | |
2566 | { | |
2567 | temp = force_reg (mode, shifted); | |
2568 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2569 | unsignedp, OPTAB_LIB_WIDEN); | |
2570 | } | |
2571 | return shifted; | |
2572 | } | |
2573 | ||
d858f359 | 2574 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
44037a66 TG |
2575 | { |
2576 | enum optab_methods methods; | |
2577 | ||
d858f359 | 2578 | if (attempt == 0) |
44037a66 | 2579 | methods = OPTAB_DIRECT; |
d858f359 | 2580 | else if (attempt == 1) |
44037a66 TG |
2581 | methods = OPTAB_WIDEN; |
2582 | else | |
2583 | methods = OPTAB_LIB_WIDEN; | |
2584 | ||
2585 | if (rotate) | |
2586 | { | |
2587 | /* Widening does not work for rotation. */ | |
2588 | if (methods == OPTAB_WIDEN) | |
2589 | continue; | |
2590 | else if (methods == OPTAB_LIB_WIDEN) | |
cbec710e | 2591 | { |
39e71615 | 2592 | /* If we have been unable to open-code this by a rotation, |
cbec710e | 2593 | do it as the IOR of two shifts. I.e., to rotate A |
ae6fa899 JJ |
2594 | by N bits, compute |
2595 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) | |
cbec710e RK |
2596 | where C is the bitsize of A. |
2597 | ||
2598 | It is theoretically possible that the target machine might | |
2599 | not be able to perform either shift and hence we would | |
2600 | be making two libcalls rather than just the one for the | |
2601 | shift (similarly if IOR could not be done). We will allow | |
2602 | this extremely unlikely lossage to avoid complicating the | |
2603 | code below. */ | |
2604 | ||
39e71615 | 2605 | rtx subtarget = target == shifted ? 0 : target; |
86529a49 | 2606 | rtx new_amount, other_amount; |
39e71615 | 2607 | rtx temp1; |
86529a49 RG |
2608 | |
2609 | new_amount = op1; | |
ae6fa899 JJ |
2610 | if (op1 == const0_rtx) |
2611 | return shifted; | |
2612 | else if (CONST_INT_P (op1)) | |
abd3c800 RS |
2613 | other_amount = gen_int_shift_amount |
2614 | (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); | |
5c049507 | 2615 | else |
ae6fa899 JJ |
2616 | { |
2617 | other_amount | |
2618 | = simplify_gen_unary (NEG, GET_MODE (op1), | |
2619 | op1, GET_MODE (op1)); | |
a4ee446d | 2620 | HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; |
ae6fa899 | 2621 | other_amount |
69a59f0f RS |
2622 | = simplify_gen_binary (AND, GET_MODE (op1), other_amount, |
2623 | gen_int_mode (mask, GET_MODE (op1))); | |
ae6fa899 | 2624 | } |
39e71615 RK |
2625 | |
2626 | shifted = force_reg (mode, shifted); | |
2627 | ||
86529a49 RG |
2628 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2629 | mode, shifted, new_amount, 0, 1); | |
2630 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, | |
2631 | mode, shifted, other_amount, | |
2632 | subtarget, 1); | |
39e71615 RK |
2633 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2634 | unsignedp, methods); | |
cbec710e | 2635 | } |
44037a66 TG |
2636 | |
2637 | temp = expand_binop (mode, | |
71d46ca5 | 2638 | left ? lrotate_optab : rrotate_optab, |
44037a66 TG |
2639 | shifted, op1, target, unsignedp, methods); |
2640 | } | |
2641 | else if (unsignedp) | |
a34958c9 | 2642 | temp = expand_binop (mode, |
71d46ca5 | 2643 | left ? lshift_optab : rshift_uns_optab, |
a34958c9 | 2644 | shifted, op1, target, unsignedp, methods); |
44037a66 TG |
2645 | |
2646 | /* Do arithmetic shifts. | |
2647 | Also, if we are going to widen the operand, we can just as well | |
2648 | use an arithmetic right-shift instead of a logical one. */ | |
2649 | if (temp == 0 && ! rotate | |
2650 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2651 | { | |
2652 | enum optab_methods methods1 = methods; | |
2653 | ||
2654 | /* If trying to widen a log shift to an arithmetic shift, | |
2655 | don't accept an arithmetic shift of the same size. */ | |
2656 | if (unsignedp) | |
2657 | methods1 = OPTAB_MUST_WIDEN; | |
2658 | ||
2659 | /* Arithmetic shift */ | |
2660 | ||
2661 | temp = expand_binop (mode, | |
71d46ca5 | 2662 | left ? lshift_optab : rshift_arith_optab, |
44037a66 TG |
2663 | shifted, op1, target, unsignedp, methods1); |
2664 | } | |
2665 | ||
711a5e64 | 2666 | /* We used to try extzv here for logical right shifts, but that was |
c410d49e | 2667 | only useful for one machine, the VAX, and caused poor code |
711a5e64 RK |
2668 | generation there for lshrdi3, so the code was deleted and a |
2669 | define_expand for lshrsi3 was added to vax.md. */ | |
44037a66 TG |
2670 | } |
2671 | ||
ea000c3f | 2672 | gcc_assert (temp != NULL_RTX || may_fail); |
44037a66 TG |
2673 | return temp; |
2674 | } | |
eb6c3df1 RG |
2675 | |
2676 | /* Output a shift instruction for expression code CODE, | |
2677 | with SHIFTED being the rtx for the value to shift, | |
2678 | and AMOUNT the amount to shift by. | |
2679 | Store the result in the rtx TARGET, if that is convenient. | |
2680 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2681 | Return the rtx for where the value is. */ | |
2682 | ||
2683 | rtx | |
ef4bddc2 | 2684 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
0c12fc9b | 2685 | poly_int64 amount, rtx target, int unsignedp) |
eb6c3df1 | 2686 | { |
abd3c800 RS |
2687 | return expand_shift_1 (code, mode, shifted, |
2688 | gen_int_shift_amount (mode, amount), | |
2689 | target, unsignedp); | |
86529a49 RG |
2690 | } |
2691 | ||
ea000c3f EB |
2692 | /* Likewise, but return 0 if that cannot be done. */ |
2693 | ||
2694 | static rtx | |
2695 | maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, | |
2696 | int amount, rtx target, int unsignedp) | |
2697 | { | |
2698 | return expand_shift_1 (code, mode, | |
2699 | shifted, GEN_INT (amount), target, unsignedp, true); | |
2700 | } | |
2701 | ||
86529a49 RG |
2702 | /* Output a shift instruction for expression code CODE, |
2703 | with SHIFTED being the rtx for the value to shift, | |
2704 | and AMOUNT the tree for the amount to shift by. | |
2705 | Store the result in the rtx TARGET, if that is convenient. | |
2706 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2707 | Return the rtx for where the value is. */ | |
2708 | ||
2709 | rtx | |
ef4bddc2 | 2710 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
86529a49 RG |
2711 | tree amount, rtx target, int unsignedp) |
2712 | { | |
2713 | return expand_shift_1 (code, mode, | |
2714 | shifted, expand_normal (amount), target, unsignedp); | |
eb6c3df1 | 2715 | } |
86529a49 | 2716 | |
44037a66 | 2717 | \f |
41c64ac0 | 2718 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
ef4bddc2 | 2719 | const struct mult_cost *, machine_mode mode); |
ef4bddc2 | 2720 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
8efc8980 | 2721 | const struct algorithm *, enum mult_variant); |
502b8322 | 2722 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
095a2d76 RS |
2723 | static rtx extract_high_half (scalar_int_mode, rtx); |
2724 | static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); | |
2725 | static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, | |
8efc8980 | 2726 | int, int); |
44037a66 | 2727 | /* Compute and return the best algorithm for multiplying by T. |
7963ac37 RK |
2728 | The algorithm must cost less than cost_limit |
2729 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
41c64ac0 RS |
2730 | other field of the returned struct are undefined. |
2731 | MODE is the machine mode of the multiplication. */ | |
44037a66 | 2732 | |
819126a6 | 2733 | static void |
502b8322 | 2734 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
ef4bddc2 | 2735 | const struct mult_cost *cost_limit, machine_mode mode) |
44037a66 | 2736 | { |
b2fb324c | 2737 | int m; |
52786026 | 2738 | struct algorithm *alg_in, *best_alg; |
26276705 RS |
2739 | struct mult_cost best_cost; |
2740 | struct mult_cost new_limit; | |
2741 | int op_cost, op_latency; | |
ef268d34 | 2742 | unsigned HOST_WIDE_INT orig_t = t; |
b2fb324c | 2743 | unsigned HOST_WIDE_INT q; |
84ddb681 | 2744 | int maxm, hash_index; |
7b13ee6b KH |
2745 | bool cache_hit = false; |
2746 | enum alg_code cache_alg = alg_zero; | |
f40751dd | 2747 | bool speed = optimize_insn_for_speed_p (); |
c7ad039d | 2748 | scalar_int_mode imode; |
5322d07e | 2749 | struct alg_hash_entry *entry_ptr; |
44037a66 | 2750 | |
7963ac37 RK |
2751 | /* Indicate that no algorithm is yet found. If no algorithm |
2752 | is found, this value will be returned and indicate failure. */ | |
26276705 | 2753 | alg_out->cost.cost = cost_limit->cost + 1; |
3ab0f290 | 2754 | alg_out->cost.latency = cost_limit->latency + 1; |
44037a66 | 2755 | |
26276705 RS |
2756 | if (cost_limit->cost < 0 |
2757 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
819126a6 | 2758 | return; |
44037a66 | 2759 | |
84ddb681 | 2760 | /* Be prepared for vector modes. */ |
c7ad039d | 2761 | imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode)); |
84ddb681 RH |
2762 | |
2763 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); | |
2764 | ||
0792ab19 | 2765 | /* Restrict the bits of "t" to the multiplication's mode. */ |
84ddb681 | 2766 | t &= GET_MODE_MASK (imode); |
0792ab19 | 2767 | |
b385aeda RK |
2768 | /* t == 1 can be done in zero cost. */ |
2769 | if (t == 1) | |
b2fb324c | 2770 | { |
819126a6 | 2771 | alg_out->ops = 1; |
26276705 RS |
2772 | alg_out->cost.cost = 0; |
2773 | alg_out->cost.latency = 0; | |
819126a6 RK |
2774 | alg_out->op[0] = alg_m; |
2775 | return; | |
b2fb324c RK |
2776 | } |
2777 | ||
b385aeda RK |
2778 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2779 | fail now. */ | |
819126a6 | 2780 | if (t == 0) |
b385aeda | 2781 | { |
5322d07e | 2782 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
819126a6 | 2783 | return; |
b385aeda RK |
2784 | else |
2785 | { | |
819126a6 | 2786 | alg_out->ops = 1; |
5322d07e NF |
2787 | alg_out->cost.cost = zero_cost (speed); |
2788 | alg_out->cost.latency = zero_cost (speed); | |
819126a6 RK |
2789 | alg_out->op[0] = alg_zero; |
2790 | return; | |
b385aeda RK |
2791 | } |
2792 | } | |
2793 | ||
52786026 RK |
2794 | /* We'll be needing a couple extra algorithm structures now. */ |
2795 | ||
1b4572a8 KG |
2796 | alg_in = XALLOCA (struct algorithm); |
2797 | best_alg = XALLOCA (struct algorithm); | |
26276705 | 2798 | best_cost = *cost_limit; |
52786026 | 2799 | |
7b13ee6b | 2800 | /* Compute the hash index. */ |
f40751dd | 2801 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7b13ee6b KH |
2802 | |
2803 | /* See if we already know what to do for T. */ | |
5322d07e NF |
2804 | entry_ptr = alg_hash_entry_ptr (hash_index); |
2805 | if (entry_ptr->t == t | |
5322d07e NF |
2806 | && entry_ptr->mode == mode |
2807 | && entry_ptr->speed == speed | |
2808 | && entry_ptr->alg != alg_unknown) | |
7b13ee6b | 2809 | { |
5322d07e | 2810 | cache_alg = entry_ptr->alg; |
0178027c KH |
2811 | |
2812 | if (cache_alg == alg_impossible) | |
7b13ee6b | 2813 | { |
0178027c | 2814 | /* The cache tells us that it's impossible to synthesize |
5322d07e NF |
2815 | multiplication by T within entry_ptr->cost. */ |
2816 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) | |
0178027c KH |
2817 | /* COST_LIMIT is at least as restrictive as the one |
2818 | recorded in the hash table, in which case we have no | |
2819 | hope of synthesizing a multiplication. Just | |
2820 | return. */ | |
2821 | return; | |
2822 | ||
2823 | /* If we get here, COST_LIMIT is less restrictive than the | |
2824 | one recorded in the hash table, so we may be able to | |
2825 | synthesize a multiplication. Proceed as if we didn't | |
2826 | have the cache entry. */ | |
2827 | } | |
2828 | else | |
2829 | { | |
5322d07e | 2830 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
0178027c KH |
2831 | /* The cached algorithm shows that this multiplication |
2832 | requires more cost than COST_LIMIT. Just return. This | |
2833 | way, we don't clobber this cache entry with | |
2834 | alg_impossible but retain useful information. */ | |
2835 | return; | |
7b13ee6b | 2836 | |
0178027c KH |
2837 | cache_hit = true; |
2838 | ||
2839 | switch (cache_alg) | |
2840 | { | |
2841 | case alg_shift: | |
2842 | goto do_alg_shift; | |
7b13ee6b | 2843 | |
0178027c KH |
2844 | case alg_add_t_m2: |
2845 | case alg_sub_t_m2: | |
2846 | goto do_alg_addsub_t_m2; | |
7b13ee6b | 2847 | |
0178027c KH |
2848 | case alg_add_factor: |
2849 | case alg_sub_factor: | |
2850 | goto do_alg_addsub_factor; | |
7b13ee6b | 2851 | |
0178027c KH |
2852 | case alg_add_t2_m: |
2853 | goto do_alg_add_t2_m; | |
7b13ee6b | 2854 | |
0178027c KH |
2855 | case alg_sub_t2_m: |
2856 | goto do_alg_sub_t2_m; | |
2857 | ||
2858 | default: | |
2859 | gcc_unreachable (); | |
2860 | } | |
7b13ee6b KH |
2861 | } |
2862 | } | |
2863 | ||
b385aeda RK |
2864 | /* If we have a group of zero bits at the low-order part of T, try |
2865 | multiplying by the remaining bits and then doing a shift. */ | |
2866 | ||
b2fb324c | 2867 | if ((t & 1) == 0) |
44037a66 | 2868 | { |
7b13ee6b | 2869 | do_alg_shift: |
146ec50f | 2870 | m = ctz_or_zero (t); /* m = number of low zero bits */ |
0792ab19 | 2871 | if (m < maxm) |
44037a66 | 2872 | { |
02a65aef | 2873 | q = t >> m; |
15bad393 RS |
2874 | /* The function expand_shift will choose between a shift and |
2875 | a sequence of additions, so the observed cost is given as | |
5322d07e NF |
2876 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2877 | op_cost = m * add_cost (speed, mode); | |
2878 | if (shift_cost (speed, mode, m) < op_cost) | |
2879 | op_cost = shift_cost (speed, mode, m); | |
26276705 RS |
2880 | new_limit.cost = best_cost.cost - op_cost; |
2881 | new_limit.latency = best_cost.latency - op_cost; | |
2882 | synth_mult (alg_in, q, &new_limit, mode); | |
2883 | ||
2884 | alg_in->cost.cost += op_cost; | |
2885 | alg_in->cost.latency += op_cost; | |
2886 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
02a65aef | 2887 | { |
26276705 | 2888 | best_cost = alg_in->cost; |
076701b6 | 2889 | std::swap (alg_in, best_alg); |
02a65aef R |
2890 | best_alg->log[best_alg->ops] = m; |
2891 | best_alg->op[best_alg->ops] = alg_shift; | |
02a65aef | 2892 | } |
ddc2690a KH |
2893 | |
2894 | /* See if treating ORIG_T as a signed number yields a better | |
2895 | sequence. Try this sequence only for a negative ORIG_T | |
2896 | as it would be useless for a non-negative ORIG_T. */ | |
2897 | if ((HOST_WIDE_INT) orig_t < 0) | |
2898 | { | |
2899 | /* Shift ORIG_T as follows because a right shift of a | |
2900 | negative-valued signed type is implementation | |
2901 | defined. */ | |
2902 | q = ~(~orig_t >> m); | |
2903 | /* The function expand_shift will choose between a shift | |
2904 | and a sequence of additions, so the observed cost is | |
5322d07e NF |
2905 | given as MIN (m * add_cost(speed, mode), |
2906 | shift_cost(speed, mode, m)). */ | |
2907 | op_cost = m * add_cost (speed, mode); | |
2908 | if (shift_cost (speed, mode, m) < op_cost) | |
2909 | op_cost = shift_cost (speed, mode, m); | |
ddc2690a KH |
2910 | new_limit.cost = best_cost.cost - op_cost; |
2911 | new_limit.latency = best_cost.latency - op_cost; | |
2912 | synth_mult (alg_in, q, &new_limit, mode); | |
2913 | ||
2914 | alg_in->cost.cost += op_cost; | |
2915 | alg_in->cost.latency += op_cost; | |
2916 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2917 | { | |
ddc2690a | 2918 | best_cost = alg_in->cost; |
076701b6 | 2919 | std::swap (alg_in, best_alg); |
ddc2690a KH |
2920 | best_alg->log[best_alg->ops] = m; |
2921 | best_alg->op[best_alg->ops] = alg_shift; | |
2922 | } | |
2923 | } | |
819126a6 | 2924 | } |
7b13ee6b KH |
2925 | if (cache_hit) |
2926 | goto done; | |
819126a6 RK |
2927 | } |
2928 | ||
2929 | /* If we have an odd number, add or subtract one. */ | |
2930 | if ((t & 1) != 0) | |
2931 | { | |
2932 | unsigned HOST_WIDE_INT w; | |
2933 | ||
7b13ee6b | 2934 | do_alg_addsub_t_m2: |
819126a6 RK |
2935 | for (w = 1; (w & t) != 0; w <<= 1) |
2936 | ; | |
31031edd | 2937 | /* If T was -1, then W will be zero after the loop. This is another |
c410d49e | 2938 | case where T ends with ...111. Handling this with (T + 1) and |
31031edd JL |
2939 | subtract 1 produces slightly better code and results in algorithm |
2940 | selection much faster than treating it like the ...0111 case | |
2941 | below. */ | |
2942 | if (w == 0 | |
2943 | || (w > 2 | |
2944 | /* Reject the case where t is 3. | |
2945 | Thus we prefer addition in that case. */ | |
2946 | && t != 3)) | |
819126a6 | 2947 | { |
c61928d0 | 2948 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |
819126a6 | 2949 | |
5322d07e | 2950 | op_cost = add_cost (speed, mode); |
26276705 RS |
2951 | new_limit.cost = best_cost.cost - op_cost; |
2952 | new_limit.latency = best_cost.latency - op_cost; | |
2953 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
b2fb324c | 2954 | |
26276705 RS |
2955 | alg_in->cost.cost += op_cost; |
2956 | alg_in->cost.latency += op_cost; | |
2957 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2958 | { |
26276705 | 2959 | best_cost = alg_in->cost; |
076701b6 | 2960 | std::swap (alg_in, best_alg); |
819126a6 RK |
2961 | best_alg->log[best_alg->ops] = 0; |
2962 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
44037a66 | 2963 | } |
44037a66 | 2964 | } |
819126a6 RK |
2965 | else |
2966 | { | |
c61928d0 | 2967 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |
44037a66 | 2968 | |
5322d07e | 2969 | op_cost = add_cost (speed, mode); |
26276705 RS |
2970 | new_limit.cost = best_cost.cost - op_cost; |
2971 | new_limit.latency = best_cost.latency - op_cost; | |
2972 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
819126a6 | 2973 | |
26276705 RS |
2974 | alg_in->cost.cost += op_cost; |
2975 | alg_in->cost.latency += op_cost; | |
2976 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
819126a6 | 2977 | { |
26276705 | 2978 | best_cost = alg_in->cost; |
076701b6 | 2979 | std::swap (alg_in, best_alg); |
819126a6 RK |
2980 | best_alg->log[best_alg->ops] = 0; |
2981 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
819126a6 RK |
2982 | } |
2983 | } | |
ef268d34 KH |
2984 | |
2985 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2986 | quickly with a - a * n for some appropriate constant n. */ | |
2987 | m = exact_log2 (-orig_t + 1); | |
2988 | if (m >= 0 && m < maxm) | |
2989 | { | |
35430ca0 KT |
2990 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2991 | /* If the target has a cheap shift-and-subtract insn use | |
2992 | that in preference to a shift insn followed by a sub insn. | |
2993 | Assume that the shift-and-sub is "atomic" with a latency | |
2994 | equal to it's cost, otherwise assume that on superscalar | |
2995 | hardware the shift may be executed concurrently with the | |
2996 | earlier steps in the algorithm. */ | |
2997 | if (shiftsub1_cost (speed, mode, m) <= op_cost) | |
2998 | { | |
2999 | op_cost = shiftsub1_cost (speed, mode, m); | |
3000 | op_latency = op_cost; | |
3001 | } | |
3002 | else | |
3003 | op_latency = add_cost (speed, mode); | |
3004 | ||
ef268d34 | 3005 | new_limit.cost = best_cost.cost - op_cost; |
35430ca0 | 3006 | new_limit.latency = best_cost.latency - op_latency; |
84ddb681 RH |
3007 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
3008 | &new_limit, mode); | |
ef268d34 KH |
3009 | |
3010 | alg_in->cost.cost += op_cost; | |
35430ca0 | 3011 | alg_in->cost.latency += op_latency; |
ef268d34 KH |
3012 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3013 | { | |
ef268d34 | 3014 | best_cost = alg_in->cost; |
076701b6 | 3015 | std::swap (alg_in, best_alg); |
ef268d34 KH |
3016 | best_alg->log[best_alg->ops] = m; |
3017 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
3018 | } | |
3019 | } | |
3020 | ||
7b13ee6b KH |
3021 | if (cache_hit) |
3022 | goto done; | |
819126a6 | 3023 | } |
63610db9 | 3024 | |
44037a66 | 3025 | /* Look for factors of t of the form |
7963ac37 | 3026 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
44037a66 | 3027 | If we find such a factor, we can multiply by t using an algorithm that |
7963ac37 | 3028 | multiplies by q, shift the result by m and add/subtract it to itself. |
44037a66 | 3029 | |
7963ac37 RK |
3030 | We search for large factors first and loop down, even if large factors |
3031 | are less probable than small; if we find a large factor we will find a | |
3032 | good sequence quickly, and therefore be able to prune (by decreasing | |
3033 | COST_LIMIT) the search. */ | |
3034 | ||
7b13ee6b | 3035 | do_alg_addsub_factor: |
7963ac37 | 3036 | for (m = floor_log2 (t - 1); m >= 2; m--) |
44037a66 | 3037 | { |
7963ac37 | 3038 | unsigned HOST_WIDE_INT d; |
44037a66 | 3039 | |
fecfbfa4 | 3040 | d = (HOST_WIDE_INT_1U << m) + 1; |
7b13ee6b KH |
3041 | if (t % d == 0 && t > d && m < maxm |
3042 | && (!cache_hit || cache_alg == alg_add_factor)) | |
44037a66 | 3043 | { |
5322d07e | 3044 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
35430ca0 KT |
3045 | if (shiftadd_cost (speed, mode, m) <= op_cost) |
3046 | op_cost = shiftadd_cost (speed, mode, m); | |
3047 | ||
3048 | op_latency = op_cost; | |
3049 | ||
26276705 RS |
3050 | |
3051 | new_limit.cost = best_cost.cost - op_cost; | |
3052 | new_limit.latency = best_cost.latency - op_latency; | |
3053 | synth_mult (alg_in, t / d, &new_limit, mode); | |
44037a66 | 3054 | |
26276705 RS |
3055 | alg_in->cost.cost += op_cost; |
3056 | alg_in->cost.latency += op_latency; | |
3057 | if (alg_in->cost.latency < op_cost) | |
3058 | alg_in->cost.latency = op_cost; | |
3059 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 3060 | { |
26276705 | 3061 | best_cost = alg_in->cost; |
076701b6 | 3062 | std::swap (alg_in, best_alg); |
b385aeda | 3063 | best_alg->log[best_alg->ops] = m; |
819126a6 | 3064 | best_alg->op[best_alg->ops] = alg_add_factor; |
44037a66 | 3065 | } |
c0b262c1 TG |
3066 | /* Other factors will have been taken care of in the recursion. */ |
3067 | break; | |
44037a66 TG |
3068 | } |
3069 | ||
fecfbfa4 | 3070 | d = (HOST_WIDE_INT_1U << m) - 1; |
7b13ee6b KH |
3071 | if (t % d == 0 && t > d && m < maxm |
3072 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
44037a66 | 3073 | { |
5322d07e | 3074 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
35430ca0 KT |
3075 | if (shiftsub0_cost (speed, mode, m) <= op_cost) |
3076 | op_cost = shiftsub0_cost (speed, mode, m); | |
3077 | ||
3078 | op_latency = op_cost; | |
26276705 RS |
3079 | |
3080 | new_limit.cost = best_cost.cost - op_cost; | |
417c735c | 3081 | new_limit.latency = best_cost.latency - op_latency; |
26276705 | 3082 | synth_mult (alg_in, t / d, &new_limit, mode); |
44037a66 | 3083 | |
26276705 RS |
3084 | alg_in->cost.cost += op_cost; |
3085 | alg_in->cost.latency += op_latency; | |
3086 | if (alg_in->cost.latency < op_cost) | |
3087 | alg_in->cost.latency = op_cost; | |
3088 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 3089 | { |
26276705 | 3090 | best_cost = alg_in->cost; |
076701b6 | 3091 | std::swap (alg_in, best_alg); |
b385aeda | 3092 | best_alg->log[best_alg->ops] = m; |
819126a6 | 3093 | best_alg->op[best_alg->ops] = alg_sub_factor; |
44037a66 | 3094 | } |
c0b262c1 | 3095 | break; |
44037a66 TG |
3096 | } |
3097 | } | |
7b13ee6b KH |
3098 | if (cache_hit) |
3099 | goto done; | |
44037a66 | 3100 | |
7963ac37 RK |
3101 | /* Try shift-and-add (load effective address) instructions, |
3102 | i.e. do a*3, a*5, a*9. */ | |
3103 | if ((t & 1) != 0) | |
3104 | { | |
7b13ee6b | 3105 | do_alg_add_t2_m: |
7963ac37 | 3106 | q = t - 1; |
146ec50f JM |
3107 | m = ctz_hwi (q); |
3108 | if (q && m < maxm) | |
b385aeda | 3109 | { |
5322d07e | 3110 | op_cost = shiftadd_cost (speed, mode, m); |
26276705 RS |
3111 | new_limit.cost = best_cost.cost - op_cost; |
3112 | new_limit.latency = best_cost.latency - op_cost; | |
3113 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
3114 | ||
3115 | alg_in->cost.cost += op_cost; | |
3116 | alg_in->cost.latency += op_cost; | |
3117 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb | 3118 | { |
26276705 | 3119 | best_cost = alg_in->cost; |
076701b6 | 3120 | std::swap (alg_in, best_alg); |
5eebe2eb | 3121 | best_alg->log[best_alg->ops] = m; |
819126a6 | 3122 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
5eebe2eb | 3123 | } |
7963ac37 | 3124 | } |
7b13ee6b KH |
3125 | if (cache_hit) |
3126 | goto done; | |
44037a66 | 3127 | |
7b13ee6b | 3128 | do_alg_sub_t2_m: |
7963ac37 | 3129 | q = t + 1; |
146ec50f JM |
3130 | m = ctz_hwi (q); |
3131 | if (q && m < maxm) | |
b385aeda | 3132 | { |
5322d07e | 3133 | op_cost = shiftsub0_cost (speed, mode, m); |
26276705 RS |
3134 | new_limit.cost = best_cost.cost - op_cost; |
3135 | new_limit.latency = best_cost.latency - op_cost; | |
3136 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
3137 | ||
3138 | alg_in->cost.cost += op_cost; | |
3139 | alg_in->cost.latency += op_cost; | |
3140 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb | 3141 | { |
26276705 | 3142 | best_cost = alg_in->cost; |
076701b6 | 3143 | std::swap (alg_in, best_alg); |
5eebe2eb | 3144 | best_alg->log[best_alg->ops] = m; |
819126a6 | 3145 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
5eebe2eb | 3146 | } |
7963ac37 | 3147 | } |
7b13ee6b KH |
3148 | if (cache_hit) |
3149 | goto done; | |
7963ac37 | 3150 | } |
44037a66 | 3151 | |
7b13ee6b | 3152 | done: |
3ab0f290 DJ |
3153 | /* If best_cost has not decreased, we have not found any algorithm. */ |
3154 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
0178027c KH |
3155 | { |
3156 | /* We failed to find an algorithm. Record alg_impossible for | |
3157 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
3158 | we are asked to find an algorithm for T within the same or | |
3159 | lower COST_LIMIT, we can immediately return to the | |
3160 | caller. */ | |
5322d07e NF |
3161 | entry_ptr->t = t; |
3162 | entry_ptr->mode = mode; | |
3163 | entry_ptr->speed = speed; | |
3164 | entry_ptr->alg = alg_impossible; | |
3165 | entry_ptr->cost = *cost_limit; | |
0178027c KH |
3166 | return; |
3167 | } | |
3ab0f290 | 3168 | |
7b13ee6b KH |
3169 | /* Cache the result. */ |
3170 | if (!cache_hit) | |
3171 | { | |
5322d07e NF |
3172 | entry_ptr->t = t; |
3173 | entry_ptr->mode = mode; | |
3174 | entry_ptr->speed = speed; | |
3175 | entry_ptr->alg = best_alg->op[best_alg->ops]; | |
3176 | entry_ptr->cost.cost = best_cost.cost; | |
3177 | entry_ptr->cost.latency = best_cost.latency; | |
7b13ee6b KH |
3178 | } |
3179 | ||
52786026 RK |
3180 | /* If we are getting a too long sequence for `struct algorithm' |
3181 | to record, make this search fail. */ | |
3182 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
3183 | return; | |
3184 | ||
819126a6 RK |
3185 | /* Copy the algorithm from temporary space to the space at alg_out. |
3186 | We avoid using structure assignment because the majority of | |
3187 | best_alg is normally undefined, and this is a critical function. */ | |
3188 | alg_out->ops = best_alg->ops + 1; | |
26276705 | 3189 | alg_out->cost = best_cost; |
4e135bdd KG |
3190 | memcpy (alg_out->op, best_alg->op, |
3191 | alg_out->ops * sizeof *alg_out->op); | |
3192 | memcpy (alg_out->log, best_alg->log, | |
3193 | alg_out->ops * sizeof *alg_out->log); | |
44037a66 TG |
3194 | } |
3195 | \f | |
d1a6adeb | 3196 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
8efc8980 RS |
3197 | Try three variations: |
3198 | ||
3199 | - a shift/add sequence based on VAL itself | |
3200 | - a shift/add sequence based on -VAL, followed by a negation | |
3201 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
3202 | ||
f258e38b UW |
3203 | Return true if the cheapest of these cost less than MULT_COST, |
3204 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
8efc8980 | 3205 | |
ec573d17 | 3206 | bool |
ef4bddc2 | 3207 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
f258e38b UW |
3208 | struct algorithm *alg, enum mult_variant *variant, |
3209 | int mult_cost) | |
8efc8980 | 3210 | { |
8efc8980 | 3211 | struct algorithm alg2; |
26276705 RS |
3212 | struct mult_cost limit; |
3213 | int op_cost; | |
f40751dd | 3214 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 3215 | |
18eaea7f RS |
3216 | /* Fail quickly for impossible bounds. */ |
3217 | if (mult_cost < 0) | |
3218 | return false; | |
3219 | ||
3220 | /* Ensure that mult_cost provides a reasonable upper bound. | |
3221 | Any constant multiplication can be performed with less | |
3222 | than 2 * bits additions. */ | |
5322d07e | 3223 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
18eaea7f RS |
3224 | if (mult_cost > op_cost) |
3225 | mult_cost = op_cost; | |
3226 | ||
8efc8980 | 3227 | *variant = basic_variant; |
26276705 RS |
3228 | limit.cost = mult_cost; |
3229 | limit.latency = mult_cost; | |
3230 | synth_mult (alg, val, &limit, mode); | |
8efc8980 RS |
3231 | |
3232 | /* This works only if the inverted value actually fits in an | |
3233 | `unsigned int' */ | |
84ddb681 | 3234 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
8efc8980 | 3235 | { |
c3284718 | 3236 | op_cost = neg_cost (speed, mode); |
26276705 RS |
3237 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3238 | { | |
3239 | limit.cost = alg->cost.cost - op_cost; | |
3240 | limit.latency = alg->cost.latency - op_cost; | |
3241 | } | |
3242 | else | |
3243 | { | |
3244 | limit.cost = mult_cost - op_cost; | |
3245 | limit.latency = mult_cost - op_cost; | |
3246 | } | |
3247 | ||
3248 | synth_mult (&alg2, -val, &limit, mode); | |
3249 | alg2.cost.cost += op_cost; | |
3250 | alg2.cost.latency += op_cost; | |
3251 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
3252 | *alg = alg2, *variant = negate_variant; |
3253 | } | |
3254 | ||
3255 | /* This proves very useful for division-by-constant. */ | |
5322d07e | 3256 | op_cost = add_cost (speed, mode); |
26276705 RS |
3257 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3258 | { | |
3259 | limit.cost = alg->cost.cost - op_cost; | |
3260 | limit.latency = alg->cost.latency - op_cost; | |
3261 | } | |
3262 | else | |
3263 | { | |
3264 | limit.cost = mult_cost - op_cost; | |
3265 | limit.latency = mult_cost - op_cost; | |
3266 | } | |
3267 | ||
3268 | synth_mult (&alg2, val - 1, &limit, mode); | |
3269 | alg2.cost.cost += op_cost; | |
3270 | alg2.cost.latency += op_cost; | |
3271 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
3272 | *alg = alg2, *variant = add_variant; |
3273 | ||
26276705 | 3274 | return MULT_COST_LESS (&alg->cost, mult_cost); |
8efc8980 RS |
3275 | } |
3276 | ||
3277 | /* A subroutine of expand_mult, used for constant multiplications. | |
3278 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
3279 | convenient. Use the shift/add sequence described by ALG and apply | |
3280 | the final fixup specified by VARIANT. */ | |
3281 | ||
3282 | static rtx | |
ef4bddc2 | 3283 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
8efc8980 RS |
3284 | rtx target, const struct algorithm *alg, |
3285 | enum mult_variant variant) | |
3286 | { | |
1b13411a | 3287 | unsigned HOST_WIDE_INT val_so_far; |
f3f6fb16 DM |
3288 | rtx_insn *insn; |
3289 | rtx accum, tem; | |
8efc8980 | 3290 | int opno; |
ef4bddc2 | 3291 | machine_mode nmode; |
8efc8980 | 3292 | |
d448860e JH |
3293 | /* Avoid referencing memory over and over and invalid sharing |
3294 | on SUBREGs. */ | |
3295 | op0 = force_reg (mode, op0); | |
8efc8980 RS |
3296 | |
3297 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
3298 | the first operation. */ | |
3299 | ||
3300 | if (alg->op[0] == alg_zero) | |
3301 | { | |
84ddb681 | 3302 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
8efc8980 RS |
3303 | val_so_far = 0; |
3304 | } | |
3305 | else if (alg->op[0] == alg_m) | |
3306 | { | |
3307 | accum = copy_to_mode_reg (mode, op0); | |
3308 | val_so_far = 1; | |
3309 | } | |
3310 | else | |
5b0264cb | 3311 | gcc_unreachable (); |
8efc8980 RS |
3312 | |
3313 | for (opno = 1; opno < alg->ops; opno++) | |
3314 | { | |
3315 | int log = alg->log[opno]; | |
7c27e184 | 3316 | rtx shift_subtarget = optimize ? 0 : accum; |
8efc8980 RS |
3317 | rtx add_target |
3318 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
7c27e184 | 3319 | && !optimize) |
8efc8980 | 3320 | ? target : 0; |
7c27e184 | 3321 | rtx accum_target = optimize ? 0 : accum; |
7543f918 | 3322 | rtx accum_inner; |
8efc8980 RS |
3323 | |
3324 | switch (alg->op[opno]) | |
3325 | { | |
3326 | case alg_shift: | |
eb6c3df1 | 3327 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
4caa21a1 UB |
3328 | /* REG_EQUAL note will be attached to the following insn. */ |
3329 | emit_move_insn (accum, tem); | |
8efc8980 RS |
3330 | val_so_far <<= log; |
3331 | break; | |
3332 | ||
3333 | case alg_add_t_m2: | |
eb6c3df1 | 3334 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
3335 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3336 | add_target ? add_target : accum_target); | |
1b13411a | 3337 | val_so_far += HOST_WIDE_INT_1U << log; |
8efc8980 RS |
3338 | break; |
3339 | ||
3340 | case alg_sub_t_m2: | |
eb6c3df1 | 3341 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
3342 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3343 | add_target ? add_target : accum_target); | |
1b13411a | 3344 | val_so_far -= HOST_WIDE_INT_1U << log; |
8efc8980 RS |
3345 | break; |
3346 | ||
3347 | case alg_add_t2_m: | |
3348 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 3349 | log, shift_subtarget, 0); |
8efc8980 RS |
3350 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3351 | add_target ? add_target : accum_target); | |
3352 | val_so_far = (val_so_far << log) + 1; | |
3353 | break; | |
3354 | ||
3355 | case alg_sub_t2_m: | |
3356 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 3357 | log, shift_subtarget, 0); |
8efc8980 RS |
3358 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3359 | add_target ? add_target : accum_target); | |
3360 | val_so_far = (val_so_far << log) - 1; | |
3361 | break; | |
3362 | ||
3363 | case alg_add_factor: | |
eb6c3df1 | 3364 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 RS |
3365 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3366 | add_target ? add_target : accum_target); | |
3367 | val_so_far += val_so_far << log; | |
3368 | break; | |
3369 | ||
3370 | case alg_sub_factor: | |
eb6c3df1 | 3371 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 | 3372 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
7c27e184 PB |
3373 | (add_target |
3374 | ? add_target : (optimize ? 0 : tem))); | |
8efc8980 RS |
3375 | val_so_far = (val_so_far << log) - val_so_far; |
3376 | break; | |
3377 | ||
3378 | default: | |
5b0264cb | 3379 | gcc_unreachable (); |
8efc8980 RS |
3380 | } |
3381 | ||
84ddb681 | 3382 | if (SCALAR_INT_MODE_P (mode)) |
8efc8980 | 3383 | { |
84ddb681 RH |
3384 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3385 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
3386 | we've set the inner register and must properly indicate that. */ | |
8e7dfe71 JJ |
3387 | tem = op0, nmode = mode; |
3388 | accum_inner = accum; | |
3389 | if (GET_CODE (accum) == SUBREG) | |
84ddb681 RH |
3390 | { |
3391 | accum_inner = SUBREG_REG (accum); | |
3392 | nmode = GET_MODE (accum_inner); | |
3393 | tem = gen_lowpart (nmode, op0); | |
3394 | } | |
8efc8980 | 3395 | |
87654f18 JJ |
3396 | /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG. |
3397 | In that case, only the low bits of accum would be guaranteed to | |
3398 | be equal to the content of the REG_EQUAL note, the upper bits | |
3399 | can be anything. */ | |
3400 | if (!paradoxical_subreg_p (tem)) | |
3401 | { | |
3402 | insn = get_last_insn (); | |
3403 | wide_int wval_so_far | |
3404 | = wi::uhwi (val_so_far, | |
3405 | GET_MODE_PRECISION (as_a <scalar_mode> (nmode))); | |
3406 | rtx c = immed_wide_int_const (wval_so_far, nmode); | |
3407 | set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c), | |
3408 | accum_inner); | |
3409 | } | |
84ddb681 | 3410 | } |
8efc8980 RS |
3411 | } |
3412 | ||
3413 | if (variant == negate_variant) | |
3414 | { | |
3415 | val_so_far = -val_so_far; | |
3416 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3417 | } | |
3418 | else if (variant == add_variant) | |
3419 | { | |
3420 | val_so_far = val_so_far + 1; | |
3421 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3422 | } | |
3423 | ||
42eb30b5 ZW |
3424 | /* Compare only the bits of val and val_so_far that are significant |
3425 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
84ddb681 | 3426 | nmode = GET_MODE_INNER (mode); |
84ddb681 RH |
3427 | val &= GET_MODE_MASK (nmode); |
3428 | val_so_far &= GET_MODE_MASK (nmode); | |
1b13411a | 3429 | gcc_assert (val == (HOST_WIDE_INT) val_so_far); |
8efc8980 RS |
3430 | |
3431 | return accum; | |
3432 | } | |
3433 | ||
44037a66 TG |
3434 | /* Perform a multiplication and return an rtx for the result. |
3435 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3436 | TARGET is a suggestion for where to store the result (an rtx). | |
3437 | ||
3438 | We check specially for a constant integer as OP1. | |
3439 | If you want this check for OP0 as well, then before calling | |
3440 | you should swap the two operands if OP0 would be constant. */ | |
3441 | ||
3442 | rtx | |
ef4bddc2 | 3443 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
0b99f253 | 3444 | int unsignedp, bool no_libcall) |
44037a66 | 3445 | { |
8efc8980 RS |
3446 | enum mult_variant variant; |
3447 | struct algorithm algorithm; | |
84ddb681 | 3448 | rtx scalar_op1; |
65dc9350 | 3449 | int max_cost; |
f40751dd | 3450 | bool speed = optimize_insn_for_speed_p (); |
84ddb681 | 3451 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
44037a66 | 3452 | |
84ddb681 | 3453 | if (CONSTANT_P (op0)) |
4c278134 | 3454 | std::swap (op0, op1); |
84ddb681 RH |
3455 | |
3456 | /* For vectors, there are several simplifications that can be made if | |
3457 | all elements of the vector constant are identical. */ | |
92695fbb | 3458 | scalar_op1 = unwrap_const_vec_duplicate (op1); |
84ddb681 RH |
3459 | |
3460 | if (INTEGRAL_MODE_P (mode)) | |
3461 | { | |
3462 | rtx fake_reg; | |
caf62455 JDA |
3463 | HOST_WIDE_INT coeff; |
3464 | bool is_neg; | |
84ddb681 RH |
3465 | int mode_bitsize; |
3466 | ||
3467 | if (op1 == CONST0_RTX (mode)) | |
3468 | return op1; | |
3469 | if (op1 == CONST1_RTX (mode)) | |
3470 | return op0; | |
3471 | if (op1 == CONSTM1_RTX (mode)) | |
3472 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, | |
3473 | op0, target, 0); | |
3474 | ||
3475 | if (do_trapv) | |
3476 | goto skip_synth; | |
3477 | ||
66b3ed5f JJ |
3478 | /* If mode is integer vector mode, check if the backend supports |
3479 | vector lshift (by scalar or vector) at all. If not, we can't use | |
3480 | synthetized multiply. */ | |
3481 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT | |
3482 | && optab_handler (vashl_optab, mode) == CODE_FOR_nothing | |
3483 | && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) | |
3484 | goto skip_synth; | |
3485 | ||
84ddb681 RH |
3486 | /* These are the operations that are potentially turned into |
3487 | a sequence of shifts and additions. */ | |
3488 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); | |
65dc9350 RS |
3489 | |
3490 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3491 | less than or equal in size to `unsigned int' this doesn't matter. | |
3492 | If the mode is larger than `unsigned int', then synth_mult works | |
3493 | only if the constant value exactly fits in an `unsigned int' without | |
3494 | any truncation. This means that multiplying by negative values does | |
3495 | not work; results are off by 2^32 on a 32 bit machine. */ | |
84ddb681 | 3496 | if (CONST_INT_P (scalar_op1)) |
58b42e19 | 3497 | { |
84ddb681 RH |
3498 | coeff = INTVAL (scalar_op1); |
3499 | is_neg = coeff < 0; | |
65dc9350 | 3500 | } |
807e902e KZ |
3501 | #if TARGET_SUPPORTS_WIDE_INT |
3502 | else if (CONST_WIDE_INT_P (scalar_op1)) | |
3503 | #else | |
48175537 | 3504 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
807e902e | 3505 | #endif |
65dc9350 | 3506 | { |
f079167a | 3507 | int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); |
807e902e KZ |
3508 | /* Perfect power of 2 (other than 1, which is handled above). */ |
3509 | if (shift > 0) | |
3510 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3511 | shift, target, unsignedp); | |
caf62455 JDA |
3512 | else |
3513 | goto skip_synth; | |
65dc9350 | 3514 | } |
84ddb681 RH |
3515 | else |
3516 | goto skip_synth; | |
b8698a0f | 3517 | |
65dc9350 RS |
3518 | /* We used to test optimize here, on the grounds that it's better to |
3519 | produce a smaller program when -O is not used. But this causes | |
3520 | such a terrible slowdown sometimes that it seems better to always | |
3521 | use synth_mult. */ | |
65dc9350 | 3522 | |
84ddb681 | 3523 | /* Special case powers of two. */ |
be63b77d JJ |
3524 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3525 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) | |
84ddb681 RH |
3526 | return expand_shift (LSHIFT_EXPR, mode, op0, |
3527 | floor_log2 (coeff), target, unsignedp); | |
3528 | ||
3529 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3530 | ||
3531 | /* Attempt to handle multiplication of DImode values by negative | |
3532 | coefficients, by performing the multiplication by a positive | |
3533 | multiplier and then inverting the result. */ | |
84ddb681 RH |
3534 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3535 | { | |
3536 | /* Its safe to use -coeff even for INT_MIN, as the | |
3537 | result is interpreted as an unsigned coefficient. | |
3538 | Exclude cost of op0 from max_cost to match the cost | |
5e839bc8 | 3539 | calculation of the synth_mult. */ |
be63b77d | 3540 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
e548c9df AM |
3541 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), |
3542 | mode, speed) | |
c3284718 | 3543 | - neg_cost (speed, mode)); |
be63b77d JJ |
3544 | if (max_cost <= 0) |
3545 | goto skip_synth; | |
3546 | ||
3547 | /* Special case powers of two. */ | |
3548 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3549 | { | |
3550 | rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, | |
3551 | floor_log2 (coeff), target, unsignedp); | |
3552 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3553 | } | |
3554 | ||
3555 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, | |
3556 | max_cost)) | |
84ddb681 | 3557 | { |
be63b77d | 3558 | rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, |
84ddb681 RH |
3559 | &algorithm, variant); |
3560 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3561 | } | |
b216b86b | 3562 | goto skip_synth; |
58b42e19 | 3563 | } |
44037a66 | 3564 | |
84ddb681 RH |
3565 | /* Exclude cost of op0 from max_cost to match the cost |
3566 | calculation of the synth_mult. */ | |
e548c9df | 3567 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); |
84ddb681 RH |
3568 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |
3569 | return expand_mult_const (mode, op0, coeff, target, | |
3570 | &algorithm, variant); | |
f2593a66 | 3571 | } |
84ddb681 | 3572 | skip_synth: |
f2593a66 RS |
3573 | |
3574 | /* Expand x*2.0 as x+x. */ | |
34a72c33 RS |
3575 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) |
3576 | && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) | |
f2593a66 | 3577 | { |
34a72c33 RS |
3578 | op0 = force_reg (GET_MODE (op0), op0); |
3579 | return expand_binop (mode, add_optab, op0, op0, | |
0b99f253 JJ |
3580 | target, unsignedp, |
3581 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); | |
f2593a66 RS |
3582 | } |
3583 | ||
819126a6 RK |
3584 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3585 | there is no difference between signed and unsigned. */ | |
84ddb681 | 3586 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
0b99f253 JJ |
3587 | op0, op1, target, unsignedp, |
3588 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); | |
3589 | gcc_assert (op0 || no_libcall); | |
44037a66 TG |
3590 | return op0; |
3591 | } | |
5b58b39b | 3592 | |
6dd8f4bb BS |
3593 | /* Return a cost estimate for multiplying a register by the given |
3594 | COEFFicient in the given MODE and SPEED. */ | |
3595 | ||
3596 | int | |
ef4bddc2 | 3597 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
6dd8f4bb BS |
3598 | { |
3599 | int max_cost; | |
3600 | struct algorithm algorithm; | |
3601 | enum mult_variant variant; | |
3602 | ||
3603 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
e548c9df AM |
3604 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), |
3605 | mode, speed); | |
6dd8f4bb BS |
3606 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |
3607 | return algorithm.cost.cost; | |
3608 | else | |
3609 | return max_cost; | |
3610 | } | |
3611 | ||
5b58b39b BS |
3612 | /* Perform a widening multiplication and return an rtx for the result. |
3613 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3614 | TARGET is a suggestion for where to store the result (an rtx). | |
3615 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3616 | or smul_widen_optab. | |
3617 | ||
3618 | We check specially for a constant integer as OP1, comparing the | |
3619 | cost of a widening multiply against the cost of a sequence of shifts | |
3620 | and adds. */ | |
3621 | ||
3622 | rtx | |
ef4bddc2 | 3623 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
5b58b39b BS |
3624 | int unsignedp, optab this_optab) |
3625 | { | |
3626 | bool speed = optimize_insn_for_speed_p (); | |
e7ef91dc | 3627 | rtx cop1; |
5b58b39b BS |
3628 | |
3629 | if (CONST_INT_P (op1) | |
e7ef91dc JJ |
3630 | && GET_MODE (op0) != VOIDmode |
3631 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, | |
3632 | this_optab == umul_widen_optab)) | |
3633 | && CONST_INT_P (cop1) | |
3634 | && (INTVAL (cop1) >= 0 | |
46c9550f | 3635 | || HWI_COMPUTABLE_MODE_P (mode))) |
5b58b39b | 3636 | { |
e7ef91dc | 3637 | HOST_WIDE_INT coeff = INTVAL (cop1); |
5b58b39b BS |
3638 | int max_cost; |
3639 | enum mult_variant variant; | |
3640 | struct algorithm algorithm; | |
3641 | ||
e9082138 JJ |
3642 | if (coeff == 0) |
3643 | return CONST0_RTX (mode); | |
3644 | ||
5b58b39b BS |
3645 | /* Special case powers of two. */ |
3646 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3647 | { | |
3648 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3649 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3650 | floor_log2 (coeff), target, unsignedp); |
5b58b39b BS |
3651 | } |
3652 | ||
3653 | /* Exclude cost of op0 from max_cost to match the cost | |
3654 | calculation of the synth_mult. */ | |
5322d07e | 3655 | max_cost = mul_widen_cost (speed, mode); |
5b58b39b BS |
3656 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3657 | max_cost)) | |
3658 | { | |
3659 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3660 | return expand_mult_const (mode, op0, coeff, target, | |
3661 | &algorithm, variant); | |
3662 | } | |
3663 | } | |
3664 | return expand_binop (mode, this_optab, op0, op1, target, | |
3665 | unsignedp, OPTAB_LIB_WIDEN); | |
3666 | } | |
44037a66 | 3667 | \f |
55c2d311 TG |
3668 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3669 | replace division by D, and put the least significant N bits of the result | |
3670 | in *MULTIPLIER_PTR and return the most significant bit. | |
3671 | ||
3672 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3673 | needed precision is in PRECISION (should be <= N). | |
3674 | ||
3675 | PRECISION should be as small as possible so this function can choose | |
3676 | multiplier more freely. | |
3677 | ||
3678 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3679 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3680 | ||
3681 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3682 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3683 | ||
55c2d311 | 3684 | unsigned HOST_WIDE_INT |
502b8322 | 3685 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
079c527f JJ |
3686 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3687 | int *post_shift_ptr, int *lgup_ptr) | |
55c2d311 | 3688 | { |
55c2d311 TG |
3689 | int lgup, post_shift; |
3690 | int pow, pow2; | |
55c2d311 TG |
3691 | |
3692 | /* lgup = ceil(log2(divisor)); */ | |
3693 | lgup = ceil_log2 (d); | |
3694 | ||
5b0264cb | 3695 | gcc_assert (lgup <= n); |
55c2d311 TG |
3696 | |
3697 | pow = n + lgup; | |
3698 | pow2 = n + lgup - precision; | |
3699 | ||
55c2d311 | 3700 | /* mlow = 2^(N + lgup)/d */ |
807e902e KZ |
3701 | wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); |
3702 | wide_int mlow = wi::udiv_trunc (val, d); | |
55c2d311 | 3703 | |
9be0ac8c | 3704 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
807e902e KZ |
3705 | val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); |
3706 | wide_int mhigh = wi::udiv_trunc (val, d); | |
55c2d311 TG |
3707 | |
3708 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3709 | (but they do not exceed 2^(N+1)). */ | |
3710 | ||
f9da5064 | 3711 | /* Reduce to lowest terms. */ |
55c2d311 TG |
3712 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3713 | { | |
807e902e KZ |
3714 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, |
3715 | HOST_BITS_PER_WIDE_INT); | |
3716 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, | |
3717 | HOST_BITS_PER_WIDE_INT); | |
55c2d311 TG |
3718 | if (ml_lo >= mh_lo) |
3719 | break; | |
3720 | ||
807e902e KZ |
3721 | mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3722 | mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); | |
55c2d311 TG |
3723 | } |
3724 | ||
3725 | *post_shift_ptr = post_shift; | |
3726 | *lgup_ptr = lgup; | |
3727 | if (n < HOST_BITS_PER_WIDE_INT) | |
3728 | { | |
fecfbfa4 | 3729 | unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; |
807e902e | 3730 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
04f8c98c | 3731 | return mhigh.to_uhwi () > mask; |
55c2d311 TG |
3732 | } |
3733 | else | |
3734 | { | |
807e902e KZ |
3735 | *multiplier_ptr = mhigh.to_uhwi (); |
3736 | return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); | |
55c2d311 TG |
3737 | } |
3738 | } | |
3739 | ||
3740 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3741 | congruent to 1 (mod 2**N). */ | |
3742 | ||
3743 | static unsigned HOST_WIDE_INT | |
502b8322 | 3744 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
55c2d311 | 3745 | { |
0f41302f | 3746 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
55c2d311 TG |
3747 | |
3748 | /* The algorithm notes that the choice y = x satisfies | |
3749 | x*y == 1 mod 2^3, since x is assumed odd. | |
3750 | Each iteration doubles the number of bits of significance in y. */ | |
3751 | ||
3752 | unsigned HOST_WIDE_INT mask; | |
3753 | unsigned HOST_WIDE_INT y = x; | |
3754 | int nbit = 3; | |
3755 | ||
3756 | mask = (n == HOST_BITS_PER_WIDE_INT | |
dd4786fe | 3757 | ? HOST_WIDE_INT_M1U |
fecfbfa4 | 3758 | : (HOST_WIDE_INT_1U << n) - 1); |
55c2d311 TG |
3759 | |
3760 | while (nbit < n) | |
3761 | { | |
3762 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3763 | nbit *= 2; | |
3764 | } | |
3765 | return y; | |
3766 | } | |
3767 | ||
3768 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3769 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3770 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3771 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3772 | become signed. | |
3773 | ||
3774 | The result is put in TARGET if that is convenient. | |
3775 | ||
3776 | MODE is the mode of operation. */ | |
3777 | ||
3778 | rtx | |
095a2d76 | 3779 | expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0, |
502b8322 | 3780 | rtx op1, rtx target, int unsignedp) |
55c2d311 TG |
3781 | { |
3782 | rtx tem; | |
3783 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3784 | ||
3785 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3786 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3787 | tem = expand_and (mode, tem, op1, NULL_RTX); |
38a448ca RH |
3788 | adj_operand |
3789 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3790 | adj_operand); | |
55c2d311 TG |
3791 | |
3792 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
eb6c3df1 | 3793 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3794 | tem = expand_and (mode, tem, op0, NULL_RTX); |
38a448ca RH |
3795 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3796 | target); | |
55c2d311 TG |
3797 | |
3798 | return target; | |
3799 | } | |
3800 | ||
00f07b86 | 3801 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
55c2d311 | 3802 | |
8efc8980 | 3803 | static rtx |
095a2d76 | 3804 | extract_high_half (scalar_int_mode mode, rtx op) |
8efc8980 | 3805 | { |
8efc8980 RS |
3806 | if (mode == word_mode) |
3807 | return gen_highpart (mode, op); | |
71af73bb | 3808 | |
90970acd | 3809 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
15ed7b52 | 3810 | |
8efc8980 | 3811 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, |
eb6c3df1 | 3812 | GET_MODE_BITSIZE (mode), 0, 1); |
8efc8980 RS |
3813 | return convert_modes (mode, wider_mode, op, 0); |
3814 | } | |
55c2d311 | 3815 | |
00f07b86 | 3816 | /* Like expmed_mult_highpart, but only consider using a multiplication |
8efc8980 RS |
3817 | optab. OP1 is an rtx for the constant operand. */ |
3818 | ||
3819 | static rtx | |
095a2d76 | 3820 | expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, |
8efc8980 | 3821 | rtx target, int unsignedp, int max_cost) |
55c2d311 | 3822 | { |
665acd1e | 3823 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
55c2d311 TG |
3824 | optab moptab; |
3825 | rtx tem; | |
8efc8980 | 3826 | int size; |
f40751dd | 3827 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3828 | |
90970acd | 3829 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
15ed7b52 | 3830 | |
8efc8980 | 3831 | size = GET_MODE_BITSIZE (mode); |
55c2d311 TG |
3832 | |
3833 | /* Firstly, try using a multiplication insn that only generates the needed | |
3834 | high part of the product, and in the sign flavor of unsignedp. */ | |
5322d07e | 3835 | if (mul_highpart_cost (speed, mode) < max_cost) |
71af73bb | 3836 | { |
8efc8980 | 3837 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
665acd1e | 3838 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3839 | unsignedp, OPTAB_DIRECT); |
3840 | if (tem) | |
3841 | return tem; | |
71af73bb | 3842 | } |
55c2d311 TG |
3843 | |
3844 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3845 | Need to adjust the result after the multiplication. */ | |
02a65aef | 3846 | if (size - 1 < BITS_PER_WORD |
5322d07e NF |
3847 | && (mul_highpart_cost (speed, mode) |
3848 | + 2 * shift_cost (speed, mode, size-1) | |
3849 | + 4 * add_cost (speed, mode) < max_cost)) | |
71af73bb | 3850 | { |
8efc8980 | 3851 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
665acd1e | 3852 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3853 | unsignedp, OPTAB_DIRECT); |
3854 | if (tem) | |
71af73bb | 3855 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3856 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
8efc8980 | 3857 | tem, unsignedp); |
71af73bb | 3858 | } |
55c2d311 | 3859 | |
71af73bb | 3860 | /* Try widening multiplication. */ |
55c2d311 | 3861 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
4b926fea | 3862 | if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
5322d07e | 3863 | && mul_widen_cost (speed, wider_mode) < max_cost) |
a295d331 | 3864 | { |
665acd1e | 3865 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
8efc8980 RS |
3866 | unsignedp, OPTAB_WIDEN); |
3867 | if (tem) | |
3868 | return extract_high_half (mode, tem); | |
c410d49e | 3869 | } |
71af73bb TG |
3870 | |
3871 | /* Try widening the mode and perform a non-widening multiplication. */ | |
947131ba | 3872 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
02a65aef | 3873 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3874 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |
3875 | < max_cost)) | |
a295d331 | 3876 | { |
f3f6fb16 DM |
3877 | rtx_insn *insns; |
3878 | rtx wop0, wop1; | |
82dfb9a5 RS |
3879 | |
3880 | /* We need to widen the operands, for example to ensure the | |
3881 | constant multiplier is correctly sign or zero extended. | |
3882 | Use a sequence to clean-up any instructions emitted by | |
3883 | the conversions if things don't work out. */ | |
3884 | start_sequence (); | |
3885 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3886 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3887 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
8efc8980 | 3888 | unsignedp, OPTAB_WIDEN); |
82dfb9a5 RS |
3889 | insns = get_insns (); |
3890 | end_sequence (); | |
3891 | ||
8efc8980 | 3892 | if (tem) |
82dfb9a5 RS |
3893 | { |
3894 | emit_insn (insns); | |
3895 | return extract_high_half (mode, tem); | |
3896 | } | |
a295d331 | 3897 | } |
71af73bb TG |
3898 | |
3899 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3900 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
4b926fea | 3901 | if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
02a65aef | 3902 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3903 | && (mul_widen_cost (speed, wider_mode) |
3904 | + 2 * shift_cost (speed, mode, size-1) | |
3905 | + 4 * add_cost (speed, mode) < max_cost)) | |
55c2d311 | 3906 | { |
665acd1e | 3907 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
71af73bb TG |
3908 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3909 | if (tem != 0) | |
55c2d311 | 3910 | { |
8efc8980 | 3911 | tem = extract_high_half (mode, tem); |
71af73bb | 3912 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3913 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
71af73bb | 3914 | target, unsignedp); |
55c2d311 | 3915 | } |
55c2d311 TG |
3916 | } |
3917 | ||
71af73bb | 3918 | return 0; |
8efc8980 | 3919 | } |
71af73bb | 3920 | |
0d282692 RS |
3921 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3922 | putting the high half of the result in TARGET if that is convenient, | |
67914693 | 3923 | and return where the result is. If the operation cannot be performed, |
0d282692 | 3924 | 0 is returned. |
55c2d311 | 3925 | |
8efc8980 RS |
3926 | MODE is the mode of operation and result. |
3927 | ||
3928 | UNSIGNEDP nonzero means unsigned multiply. | |
3929 | ||
3930 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3931 | ||
0d282692 | 3932 | static rtx |
095a2d76 | 3933 | expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, |
0d282692 | 3934 | rtx target, int unsignedp, int max_cost) |
8efc8980 | 3935 | { |
0d282692 | 3936 | unsigned HOST_WIDE_INT cnst1; |
f258e38b UW |
3937 | int extra_cost; |
3938 | bool sign_adjust = false; | |
8efc8980 RS |
3939 | enum mult_variant variant; |
3940 | struct algorithm alg; | |
0d282692 | 3941 | rtx tem; |
f40751dd | 3942 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 RS |
3943 | |
3944 | /* We can't support modes wider than HOST_BITS_PER_INT. */ | |
46c9550f | 3945 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
8efc8980 | 3946 | |
0d282692 | 3947 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
f258e38b | 3948 | |
b8698a0f L |
3949 | /* We can't optimize modes wider than BITS_PER_WORD. |
3950 | ??? We might be able to perform double-word arithmetic if | |
f258e38b UW |
3951 | mode == word_mode, however all the cost calculations in |
3952 | synth_mult etc. assume single-word operations. */ | |
90970acd | 3953 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
f258e38b | 3954 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) |
00f07b86 | 3955 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
f258e38b UW |
3956 | unsignedp, max_cost); |
3957 | ||
5322d07e | 3958 | extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); |
f258e38b UW |
3959 | |
3960 | /* Check whether we try to multiply by a negative constant. */ | |
3961 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3962 | { | |
3963 | sign_adjust = true; | |
5322d07e | 3964 | extra_cost += add_cost (speed, mode); |
f258e38b | 3965 | } |
8efc8980 RS |
3966 | |
3967 | /* See whether shift/add multiplication is cheap enough. */ | |
f258e38b UW |
3968 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3969 | max_cost - extra_cost)) | |
a295d331 | 3970 | { |
8efc8980 RS |
3971 | /* See whether the specialized multiplication optabs are |
3972 | cheaper than the shift/add version. */ | |
00f07b86 | 3973 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
26276705 | 3974 | alg.cost.cost + extra_cost); |
8efc8980 RS |
3975 | if (tem) |
3976 | return tem; | |
3977 | ||
f258e38b UW |
3978 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3979 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3980 | tem = extract_high_half (mode, tem); | |
3981 | ||
9cf737f8 | 3982 | /* Adjust result for signedness. */ |
f258e38b UW |
3983 | if (sign_adjust) |
3984 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3985 | ||
3986 | return tem; | |
a295d331 | 3987 | } |
00f07b86 | 3988 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
8efc8980 | 3989 | unsignedp, max_cost); |
55c2d311 | 3990 | } |
0b55e932 RS |
3991 | |
3992 | ||
3993 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3994 | ||
3995 | static rtx | |
095a2d76 | 3996 | expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
0b55e932 | 3997 | { |
f3f6fb16 DM |
3998 | rtx result, temp, shift; |
3999 | rtx_code_label *label; | |
0b55e932 | 4000 | int logd; |
807e902e | 4001 | int prec = GET_MODE_PRECISION (mode); |
0b55e932 RS |
4002 | |
4003 | logd = floor_log2 (d); | |
4004 | result = gen_reg_rtx (mode); | |
4005 | ||
4006 | /* Avoid conditional branches when they're expensive. */ | |
3a4fd356 | 4007 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
22660666 | 4008 | && optimize_insn_for_speed_p ()) |
0b55e932 RS |
4009 | { |
4010 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
4011 | mode, 0, -1); | |
4012 | if (signmask) | |
4013 | { | |
fecfbfa4 | 4014 | HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; |
0b55e932 | 4015 | signmask = force_reg (mode, signmask); |
abd3c800 | 4016 | shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd); |
1c234fcb RS |
4017 | |
4018 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
4019 | which instruction sequence to use. If logical right shifts | |
4020 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
4021 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
6e7a355c | 4022 | |
1c234fcb | 4023 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
947131ba | 4024 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
e548c9df | 4025 | || (set_src_cost (temp, mode, optimize_insn_for_speed_p ()) |
5e8f01f4 | 4026 | > COSTS_N_INSNS (2))) |
1c234fcb RS |
4027 | { |
4028 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
4029 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4030 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
4031 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
4032 | temp = expand_binop (mode, and_optab, temp, |
4033 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
4034 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4035 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
4036 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4037 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
4038 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4039 | } | |
4040 | else | |
4041 | { | |
4042 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
4043 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4044 | signmask = force_reg (mode, signmask); | |
4045 | ||
4046 | temp = expand_binop (mode, add_optab, op0, signmask, | |
4047 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
4048 | temp = expand_binop (mode, and_optab, temp, |
4049 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
4050 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4051 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
4052 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4053 | } | |
0b55e932 RS |
4054 | return temp; |
4055 | } | |
4056 | } | |
4057 | ||
4058 | /* Mask contains the mode's signbit and the significant bits of the | |
4059 | modulus. By including the signbit in the operation, many targets | |
4060 | can avoid an explicit compare operation in the following comparison | |
4061 | against zero. */ | |
807e902e KZ |
4062 | wide_int mask = wi::mask (logd, false, prec); |
4063 | mask = wi::set_bit (mask, prec - 1); | |
0b55e932 | 4064 | |
6e7a355c | 4065 | temp = expand_binop (mode, and_optab, op0, |
807e902e | 4066 | immed_wide_int_const (mask, mode), |
6e7a355c | 4067 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
4068 | if (temp != result) |
4069 | emit_move_insn (result, temp); | |
4070 | ||
4071 | label = gen_label_rtx (); | |
4072 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
4073 | ||
4074 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
4075 | 0, OPTAB_LIB_WIDEN); | |
807e902e KZ |
4076 | |
4077 | mask = wi::mask (logd, true, prec); | |
6e7a355c | 4078 | temp = expand_binop (mode, ior_optab, temp, |
807e902e | 4079 | immed_wide_int_const (mask, mode), |
6e7a355c | 4080 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
4081 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
4082 | 0, OPTAB_LIB_WIDEN); | |
4083 | if (temp != result) | |
4084 | emit_move_insn (result, temp); | |
4085 | emit_label (label); | |
4086 | return result; | |
4087 | } | |
39cab019 RS |
4088 | |
4089 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
4090 | This routine is only called for positive values of D. */ | |
4091 | ||
4092 | static rtx | |
095a2d76 | 4093 | expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
39cab019 | 4094 | { |
f3f6fb16 DM |
4095 | rtx temp; |
4096 | rtx_code_label *label; | |
39cab019 RS |
4097 | int logd; |
4098 | ||
4099 | logd = floor_log2 (d); | |
39cab019 | 4100 | |
3a4fd356 JH |
4101 | if (d == 2 |
4102 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
4103 | false) >= 1) | |
39cab019 RS |
4104 | { |
4105 | temp = gen_reg_rtx (mode); | |
4106 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
259c3965 JL |
4107 | if (temp != NULL_RTX) |
4108 | { | |
4109 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
4110 | 0, OPTAB_LIB_WIDEN); | |
4111 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); | |
4112 | } | |
39cab019 RS |
4113 | } |
4114 | ||
692e0312 TS |
4115 | if (HAVE_conditional_move |
4116 | && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) | |
fdded401 RS |
4117 | { |
4118 | rtx temp2; | |
4119 | ||
4120 | start_sequence (); | |
4121 | temp2 = copy_to_mode_reg (mode, op0); | |
2f1cd2eb | 4122 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
fdded401 RS |
4123 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4124 | temp = force_reg (mode, temp); | |
4125 | ||
4126 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
4127 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
4128 | mode, temp, temp2, mode, 0); | |
4129 | if (temp2) | |
4130 | { | |
f3f6fb16 | 4131 | rtx_insn *seq = get_insns (); |
fdded401 RS |
4132 | end_sequence (); |
4133 | emit_insn (seq); | |
eb6c3df1 | 4134 | return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); |
fdded401 RS |
4135 | } |
4136 | end_sequence (); | |
4137 | } | |
fdded401 | 4138 | |
3a4fd356 JH |
4139 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
4140 | false) >= 2) | |
39cab019 RS |
4141 | { |
4142 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
4143 | ||
4144 | temp = gen_reg_rtx (mode); | |
4145 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
259c3965 JL |
4146 | if (temp != NULL_RTX) |
4147 | { | |
4148 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD | |
4149 | || shift_cost (optimize_insn_for_speed_p (), mode, ushift) | |
4150 | > COSTS_N_INSNS (1)) | |
4151 | temp = expand_binop (mode, and_optab, temp, | |
4152 | gen_int_mode (d - 1, mode), | |
4153 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
4154 | else | |
4155 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
4156 | ushift, NULL_RTX, 1); | |
4157 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
4158 | 0, OPTAB_LIB_WIDEN); | |
4159 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); | |
4160 | } | |
39cab019 RS |
4161 | } |
4162 | ||
4163 | label = gen_label_rtx (); | |
4164 | temp = copy_to_mode_reg (mode, op0); | |
4165 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
2f1cd2eb | 4166 | expand_inc (temp, gen_int_mode (d - 1, mode)); |
39cab019 | 4167 | emit_label (label); |
eb6c3df1 | 4168 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 | 4169 | } |
55c2d311 | 4170 | \f |
44037a66 TG |
4171 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
4172 | if that is convenient, and returning where the result is. | |
4173 | You may request either the quotient or the remainder as the result; | |
4174 | specify REM_FLAG nonzero to get the remainder. | |
4175 | ||
4176 | CODE is the expression code for which kind of division this is; | |
4177 | it controls how rounding is done. MODE is the machine mode to use. | |
4178 | UNSIGNEDP nonzero means do unsigned division. */ | |
4179 | ||
4180 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
4181 | and then correct it by or'ing in missing high bits | |
4182 | if result of ANDI is nonzero. | |
4183 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
4184 | This could optimize to a bfexts instruction. | |
4185 | But C doesn't use these operations, so their optimizations are | |
4186 | left for later. */ | |
5353610b R |
4187 | /* ??? For modulo, we don't actually need the highpart of the first product, |
4188 | the low part will do nicely. And for small divisors, the second multiply | |
4189 | can also be a low-part only multiply or even be completely left out. | |
4190 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
4191 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
4192 | the result is exact for inputs up to 0x1fffffff. | |
4193 | The input range can be reduced by using cross-sum rules. | |
4194 | For odd divisors >= 3, the following table gives right shift counts | |
09da1532 | 4195 | so that if a number is shifted by an integer multiple of the given |
5353610b R |
4196 | amount, the remainder stays the same: |
4197 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
4198 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
4199 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
4200 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
4201 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
4202 | ||
4203 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
4204 | to the right alone as the divisor has zeros to the right. | |
4205 | E.g. if x is an unsigned 32 bit number: | |
4206 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
4207 | */ | |
44037a66 TG |
4208 | |
4209 | rtx | |
ef4bddc2 | 4210 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
e34153b0 JJ |
4211 | rtx op0, rtx op1, rtx target, int unsignedp, |
4212 | enum optab_methods methods) | |
44037a66 | 4213 | { |
ef4bddc2 | 4214 | machine_mode compute_mode; |
b3694847 | 4215 | rtx tquotient; |
55c2d311 | 4216 | rtx quotient = 0, remainder = 0; |
f3f6fb16 | 4217 | rtx_insn *last; |
f3f6fb16 | 4218 | rtx_insn *insn; |
44037a66 | 4219 | optab optab1, optab2; |
1c4a429a | 4220 | int op1_is_constant, op1_is_pow2 = 0; |
71af73bb | 4221 | int max_cost, extra_cost; |
9ec36da5 | 4222 | static HOST_WIDE_INT last_div_const = 0; |
f40751dd | 4223 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 4224 | |
481683e1 | 4225 | op1_is_constant = CONST_INT_P (op1); |
1c4a429a JH |
4226 | if (op1_is_constant) |
4227 | { | |
76a7314d JJ |
4228 | wide_int ext_op1 = rtx_mode_t (op1, mode); |
4229 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 | |
4230 | || (! unsignedp | |
4231 | && wi::popcount (wi::neg (ext_op1)) == 1)); | |
1c4a429a | 4232 | } |
55c2d311 TG |
4233 | |
4234 | /* | |
4235 | This is the structure of expand_divmod: | |
4236 | ||
4237 | First comes code to fix up the operands so we can perform the operations | |
4238 | correctly and efficiently. | |
4239 | ||
4240 | Second comes a switch statement with code specific for each rounding mode. | |
4241 | For some special operands this code emits all RTL for the desired | |
69f61901 | 4242 | operation, for other cases, it generates only a quotient and stores it in |
55c2d311 TG |
4243 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
4244 | to indicate that it has not done anything. | |
4245 | ||
69f61901 RK |
4246 | Last comes code that finishes the operation. If QUOTIENT is set and |
4247 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
4248 | QUOTIENT is not set, it is computed using trunc rounding. | |
44037a66 | 4249 | |
55c2d311 TG |
4250 | We try to generate special code for division and remainder when OP1 is a |
4251 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
4252 | operations. For other values of OP1, we compute a carefully selected | |
4253 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
4254 | by m. | |
4255 | ||
4256 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
4257 | half of the product. Different strategies for generating the product are | |
00f07b86 | 4258 | implemented in expmed_mult_highpart. |
55c2d311 TG |
4259 | |
4260 | If what we actually want is the remainder, we generate that by another | |
4261 | by-constant multiplication and a subtraction. */ | |
4262 | ||
4263 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
3d32ffd1 TW |
4264 | code below will malfunction if we are, so check here and handle |
4265 | the special case if so. */ | |
4266 | if (op1 == const1_rtx) | |
4267 | return rem_flag ? const0_rtx : op0; | |
4268 | ||
91ce572a CC |
4269 | /* When dividing by -1, we could get an overflow. |
4270 | negv_optab can handle overflows. */ | |
4271 | if (! unsignedp && op1 == constm1_rtx) | |
4272 | { | |
4273 | if (rem_flag) | |
0fb7aeda | 4274 | return const0_rtx; |
c3284718 | 4275 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
0fb7aeda | 4276 | ? negv_optab : neg_optab, op0, target, 0); |
91ce572a CC |
4277 | } |
4278 | ||
bc1c7e93 RK |
4279 | if (target |
4280 | /* Don't use the function value register as a target | |
4281 | since we have to read it as well as write it, | |
4282 | and function-inlining gets confused by this. */ | |
4283 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
4284 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
515dfc7a | 4285 | || ((rem_flag || op1_is_constant) |
bc1c7e93 | 4286 | && (reg_mentioned_p (target, op0) |
3c0cb5de | 4287 | || (MEM_P (op0) && MEM_P (target)))) |
bc1c7e93 | 4288 | || reg_mentioned_p (target, op1) |
3c0cb5de | 4289 | || (MEM_P (op1) && MEM_P (target)))) |
44037a66 TG |
4290 | target = 0; |
4291 | ||
44037a66 TG |
4292 | /* Get the mode in which to perform this computation. Normally it will |
4293 | be MODE, but sometimes we can't do the desired operation in MODE. | |
4294 | If so, pick a wider mode in which we can do the operation. Convert | |
4295 | to that mode at the start to avoid repeated conversions. | |
4296 | ||
4297 | First see what operations we need. These depend on the expression | |
4298 | we are evaluating. (We assume that divxx3 insns exist under the | |
4299 | same conditions that modxx3 insns and that these insns don't normally | |
4300 | fail. If these assumptions are not correct, we may generate less | |
4301 | efficient code in some cases.) | |
4302 | ||
4303 | Then see if we find a mode in which we can open-code that operation | |
4304 | (either a division, modulus, or shift). Finally, check for the smallest | |
4305 | mode for which we can do the operation with a library call. */ | |
4306 | ||
55c2d311 | 4307 | /* We might want to refine this now that we have division-by-constant |
00f07b86 | 4308 | optimization. Since expmed_mult_highpart tries so many variants, it is |
55c2d311 TG |
4309 | not straightforward to generalize this. Maybe we should make an array |
4310 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
4311 | ||
76a7314d | 4312 | optab1 = (op1_is_pow2 |
556a56ac | 4313 | ? (unsignedp ? lshr_optab : ashr_optab) |
44037a66 | 4314 | : (unsignedp ? udiv_optab : sdiv_optab)); |
76a7314d | 4315 | optab2 = (op1_is_pow2 ? optab1 |
556a56ac | 4316 | : (unsignedp ? udivmod_optab : sdivmod_optab)); |
44037a66 | 4317 | |
e34153b0 JJ |
4318 | if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN) |
4319 | { | |
4320 | FOR_EACH_MODE_FROM (compute_mode, mode) | |
4321 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing | |
4322 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
44037a66 TG |
4323 | break; |
4324 | ||
e34153b0 JJ |
4325 | if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN) |
4326 | FOR_EACH_MODE_FROM (compute_mode, mode) | |
4327 | if (optab_libfunc (optab1, compute_mode) | |
4328 | || optab_libfunc (optab2, compute_mode)) | |
4329 | break; | |
4330 | } | |
4331 | else | |
4332 | compute_mode = mode; | |
4333 | ||
535a42b1 NS |
4334 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4335 | probably die. */ | |
44037a66 TG |
4336 | if (compute_mode == VOIDmode) |
4337 | compute_mode = mode; | |
4338 | ||
55c2d311 TG |
4339 | if (target && GET_MODE (target) == compute_mode) |
4340 | tquotient = target; | |
4341 | else | |
4342 | tquotient = gen_reg_rtx (compute_mode); | |
2c414fba | 4343 | |
55c2d311 TG |
4344 | #if 0 |
4345 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
71af73bb TG |
4346 | (mode), and thereby get better code when OP1 is a constant. Do that |
4347 | later. It will require going over all usages of SIZE below. */ | |
55c2d311 TG |
4348 | size = GET_MODE_BITSIZE (mode); |
4349 | #endif | |
bc1c7e93 | 4350 | |
9ec36da5 JL |
4351 | /* Only deduct something for a REM if the last divide done was |
4352 | for a different constant. Then set the constant of the last | |
4353 | divide. */ | |
5322d07e NF |
4354 | max_cost = (unsignedp |
4355 | ? udiv_cost (speed, compute_mode) | |
4356 | : sdiv_cost (speed, compute_mode)); | |
a28b2ac6 RS |
4357 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4358 | && INTVAL (op1) == last_div_const)) | |
5322d07e NF |
4359 | max_cost -= (mul_cost (speed, compute_mode) |
4360 | + add_cost (speed, compute_mode)); | |
9ec36da5 JL |
4361 | |
4362 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
71af73bb | 4363 | |
55c2d311 | 4364 | /* Now convert to the best mode to use. */ |
44037a66 TG |
4365 | if (compute_mode != mode) |
4366 | { | |
55c2d311 | 4367 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
81722fa9 | 4368 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
e13a25d5 | 4369 | |
e9a25f70 JL |
4370 | /* convert_modes may have placed op1 into a register, so we |
4371 | must recompute the following. */ | |
481683e1 | 4372 | op1_is_constant = CONST_INT_P (op1); |
76a7314d JJ |
4373 | if (op1_is_constant) |
4374 | { | |
4375 | wide_int ext_op1 = rtx_mode_t (op1, compute_mode); | |
4376 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 | |
4377 | || (! unsignedp | |
4378 | && wi::popcount (wi::neg (ext_op1)) == 1)); | |
4379 | } | |
4380 | else | |
4381 | op1_is_pow2 = 0; | |
44037a66 TG |
4382 | } |
4383 | ||
55c2d311 | 4384 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
c2a47e48 | 4385 | |
3c0cb5de | 4386 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
55c2d311 | 4387 | op0 = force_reg (compute_mode, op0); |
3c0cb5de | 4388 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
c2a47e48 RK |
4389 | op1 = force_reg (compute_mode, op1); |
4390 | ||
ab0b6581 TG |
4391 | /* If we need the remainder or if OP1 is constant, we need to |
4392 | put OP0 in a register in case it has any queued subexpressions. */ | |
4393 | if (rem_flag || op1_is_constant) | |
4394 | op0 = force_reg (compute_mode, op0); | |
bc1c7e93 | 4395 | |
55c2d311 | 4396 | last = get_last_insn (); |
44037a66 | 4397 | |
9faa82d8 | 4398 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
55c2d311 | 4399 | if (unsignedp) |
44037a66 | 4400 | { |
55c2d311 TG |
4401 | if (code == FLOOR_DIV_EXPR) |
4402 | code = TRUNC_DIV_EXPR; | |
4403 | if (code == FLOOR_MOD_EXPR) | |
4404 | code = TRUNC_MOD_EXPR; | |
db7cafb0 JL |
4405 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4406 | code = TRUNC_DIV_EXPR; | |
55c2d311 | 4407 | } |
bc1c7e93 | 4408 | |
55c2d311 TG |
4409 | if (op1 != const0_rtx) |
4410 | switch (code) | |
4411 | { | |
4412 | case TRUNC_MOD_EXPR: | |
4413 | case TRUNC_DIV_EXPR: | |
34f016ed | 4414 | if (op1_is_constant) |
55c2d311 | 4415 | { |
c7ad039d RS |
4416 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
4417 | int size = GET_MODE_BITSIZE (int_mode); | |
d8f1376c | 4418 | if (unsignedp) |
55c2d311 | 4419 | { |
079c527f | 4420 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4421 | int pre_shift, post_shift; |
4422 | int dummy; | |
c7ad039d | 4423 | wide_int wd = rtx_mode_t (op1, int_mode); |
76a7314d | 4424 | unsigned HOST_WIDE_INT d = wd.to_uhwi (); |
55c2d311 | 4425 | |
76a7314d | 4426 | if (wi::popcount (wd) == 1) |
55c2d311 TG |
4427 | { |
4428 | pre_shift = floor_log2 (d); | |
4429 | if (rem_flag) | |
4430 | { | |
2f1cd2eb | 4431 | unsigned HOST_WIDE_INT mask |
fecfbfa4 | 4432 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
db3cf6fb | 4433 | remainder |
c7ad039d RS |
4434 | = expand_binop (int_mode, and_optab, op0, |
4435 | gen_int_mode (mask, int_mode), | |
e34153b0 | 4436 | remainder, 1, methods); |
55c2d311 | 4437 | if (remainder) |
c8dbc8ca | 4438 | return gen_lowpart (mode, remainder); |
55c2d311 | 4439 | } |
c7ad039d | 4440 | quotient = expand_shift (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4441 | pre_shift, tquotient, 1); |
55c2d311 | 4442 | } |
34f016ed | 4443 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 4444 | { |
fecfbfa4 | 4445 | if (d >= (HOST_WIDE_INT_1U << (size - 1))) |
55c2d311 | 4446 | { |
dc1d6150 TG |
4447 | /* Most significant bit of divisor is set; emit an scc |
4448 | insn. */ | |
b45f0e58 | 4449 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
c7ad039d | 4450 | int_mode, 1, 1); |
55c2d311 TG |
4451 | } |
4452 | else | |
4453 | { | |
dc1d6150 TG |
4454 | /* Find a suitable multiplier and right shift count |
4455 | instead of multiplying with D. */ | |
4456 | ||
4457 | mh = choose_multiplier (d, size, size, | |
4458 | &ml, &post_shift, &dummy); | |
4459 | ||
4460 | /* If the suggested multiplier is more than SIZE bits, | |
4461 | we can do better for even divisors, using an | |
4462 | initial right shift. */ | |
4463 | if (mh != 0 && (d & 1) == 0) | |
4464 | { | |
146ec50f | 4465 | pre_shift = ctz_or_zero (d); |
dc1d6150 TG |
4466 | mh = choose_multiplier (d >> pre_shift, size, |
4467 | size - pre_shift, | |
4468 | &ml, &post_shift, &dummy); | |
5b0264cb | 4469 | gcc_assert (!mh); |
dc1d6150 TG |
4470 | } |
4471 | else | |
4472 | pre_shift = 0; | |
4473 | ||
4474 | if (mh != 0) | |
4475 | { | |
4476 | rtx t1, t2, t3, t4; | |
4477 | ||
02a65aef R |
4478 | if (post_shift - 1 >= BITS_PER_WORD) |
4479 | goto fail1; | |
4480 | ||
965703ed | 4481 | extra_cost |
c7ad039d RS |
4482 | = (shift_cost (speed, int_mode, post_shift - 1) |
4483 | + shift_cost (speed, int_mode, 1) | |
4484 | + 2 * add_cost (speed, int_mode)); | |
2f1cd2eb | 4485 | t1 = expmed_mult_highpart |
c7ad039d | 4486 | (int_mode, op0, gen_int_mode (ml, int_mode), |
2f1cd2eb | 4487 | NULL_RTX, 1, max_cost - extra_cost); |
dc1d6150 TG |
4488 | if (t1 == 0) |
4489 | goto fail1; | |
c7ad039d | 4490 | t2 = force_operand (gen_rtx_MINUS (int_mode, |
38a448ca | 4491 | op0, t1), |
dc1d6150 | 4492 | NULL_RTX); |
c7ad039d | 4493 | t3 = expand_shift (RSHIFT_EXPR, int_mode, |
eb6c3df1 | 4494 | t2, 1, NULL_RTX, 1); |
c7ad039d | 4495 | t4 = force_operand (gen_rtx_PLUS (int_mode, |
38a448ca | 4496 | t1, t3), |
dc1d6150 | 4497 | NULL_RTX); |
4a90aeeb | 4498 | quotient = expand_shift |
c7ad039d | 4499 | (RSHIFT_EXPR, int_mode, t4, |
eb6c3df1 | 4500 | post_shift - 1, tquotient, 1); |
dc1d6150 TG |
4501 | } |
4502 | else | |
4503 | { | |
4504 | rtx t1, t2; | |
4505 | ||
02a65aef R |
4506 | if (pre_shift >= BITS_PER_WORD |
4507 | || post_shift >= BITS_PER_WORD) | |
4508 | goto fail1; | |
4509 | ||
4a90aeeb | 4510 | t1 = expand_shift |
c7ad039d | 4511 | (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4512 | pre_shift, NULL_RTX, 1); |
965703ed | 4513 | extra_cost |
c7ad039d RS |
4514 | = (shift_cost (speed, int_mode, pre_shift) |
4515 | + shift_cost (speed, int_mode, post_shift)); | |
2f1cd2eb | 4516 | t2 = expmed_mult_highpart |
c7ad039d RS |
4517 | (int_mode, t1, |
4518 | gen_int_mode (ml, int_mode), | |
2f1cd2eb | 4519 | NULL_RTX, 1, max_cost - extra_cost); |
dc1d6150 TG |
4520 | if (t2 == 0) |
4521 | goto fail1; | |
4a90aeeb | 4522 | quotient = expand_shift |
c7ad039d | 4523 | (RSHIFT_EXPR, int_mode, t2, |
eb6c3df1 | 4524 | post_shift, tquotient, 1); |
dc1d6150 | 4525 | } |
55c2d311 TG |
4526 | } |
4527 | } | |
34f016ed TG |
4528 | else /* Too wide mode to use tricky code */ |
4529 | break; | |
55c2d311 TG |
4530 | |
4531 | insn = get_last_insn (); | |
7543f918 JR |
4532 | if (insn != last) |
4533 | set_dst_reg_note (insn, REG_EQUAL, | |
c7ad039d | 4534 | gen_rtx_UDIV (int_mode, op0, op1), |
7543f918 | 4535 | quotient); |
55c2d311 TG |
4536 | } |
4537 | else /* TRUNC_DIV, signed */ | |
4538 | { | |
4539 | unsigned HOST_WIDE_INT ml; | |
4540 | int lgup, post_shift; | |
e71c0aa7 | 4541 | rtx mlr; |
55c2d311 | 4542 | HOST_WIDE_INT d = INTVAL (op1); |
e4c9f3c2 ILT |
4543 | unsigned HOST_WIDE_INT abs_d; |
4544 | ||
ebac3c02 JJ |
4545 | /* Not prepared to handle division/remainder by |
4546 | 0xffffffffffffffff8000000000000000 etc. */ | |
4547 | if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT) | |
4548 | break; | |
4549 | ||
093253be ILT |
4550 | /* Since d might be INT_MIN, we have to cast to |
4551 | unsigned HOST_WIDE_INT before negating to avoid | |
4552 | undefined signed overflow. */ | |
6d9c91e9 ILT |
4553 | abs_d = (d >= 0 |
4554 | ? (unsigned HOST_WIDE_INT) d | |
4555 | : - (unsigned HOST_WIDE_INT) d); | |
55c2d311 TG |
4556 | |
4557 | /* n rem d = n rem -d */ | |
4558 | if (rem_flag && d < 0) | |
4559 | { | |
4560 | d = abs_d; | |
c7ad039d | 4561 | op1 = gen_int_mode (abs_d, int_mode); |
55c2d311 TG |
4562 | } |
4563 | ||
4564 | if (d == 1) | |
4565 | quotient = op0; | |
4566 | else if (d == -1) | |
c7ad039d | 4567 | quotient = expand_unop (int_mode, neg_optab, op0, |
55c2d311 | 4568 | tquotient, 0); |
76a7314d | 4569 | else if (size <= HOST_BITS_PER_WIDE_INT |
fecfbfa4 | 4570 | && abs_d == HOST_WIDE_INT_1U << (size - 1)) |
f737b132 RK |
4571 | { |
4572 | /* This case is not handled correctly below. */ | |
4573 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
c7ad039d | 4574 | int_mode, 1, 1); |
f737b132 RK |
4575 | if (quotient == 0) |
4576 | goto fail1; | |
4577 | } | |
55c2d311 | 4578 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
76a7314d | 4579 | && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) |
5322d07e | 4580 | && (rem_flag |
c7ad039d RS |
4581 | ? smod_pow2_cheap (speed, int_mode) |
4582 | : sdiv_pow2_cheap (speed, int_mode)) | |
0b55e932 RS |
4583 | /* We assume that cheap metric is true if the |
4584 | optab has an expander for this mode. */ | |
166cdb08 JH |
4585 | && ((optab_handler ((rem_flag ? smod_optab |
4586 | : sdiv_optab), | |
c7ad039d | 4587 | int_mode) |
a8c7e72d | 4588 | != CODE_FOR_nothing) |
c7ad039d | 4589 | || (optab_handler (sdivmod_optab, int_mode) |
947131ba | 4590 | != CODE_FOR_nothing))) |
55c2d311 | 4591 | ; |
ebac3c02 | 4592 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) |
55c2d311 | 4593 | { |
0b55e932 RS |
4594 | if (rem_flag) |
4595 | { | |
c7ad039d | 4596 | remainder = expand_smod_pow2 (int_mode, op0, d); |
0b55e932 RS |
4597 | if (remainder) |
4598 | return gen_lowpart (mode, remainder); | |
4599 | } | |
3d520aaf | 4600 | |
c7ad039d RS |
4601 | if (sdiv_pow2_cheap (speed, int_mode) |
4602 | && ((optab_handler (sdiv_optab, int_mode) | |
3d520aaf | 4603 | != CODE_FOR_nothing) |
c7ad039d | 4604 | || (optab_handler (sdivmod_optab, int_mode) |
3d520aaf DE |
4605 | != CODE_FOR_nothing))) |
4606 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
c7ad039d | 4607 | int_mode, op0, |
3d520aaf | 4608 | gen_int_mode (abs_d, |
c7ad039d | 4609 | int_mode), |
3d520aaf DE |
4610 | NULL_RTX, 0); |
4611 | else | |
c7ad039d | 4612 | quotient = expand_sdiv_pow2 (int_mode, op0, abs_d); |
55c2d311 | 4613 | |
0b55e932 RS |
4614 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4615 | negate the quotient. */ | |
55c2d311 TG |
4616 | if (d < 0) |
4617 | { | |
4618 | insn = get_last_insn (); | |
4e430df8 | 4619 | if (insn != last |
fecfbfa4 | 4620 | && abs_d < (HOST_WIDE_INT_1U |
c8e7fe58 | 4621 | << (HOST_BITS_PER_WIDE_INT - 1))) |
7543f918 | 4622 | set_dst_reg_note (insn, REG_EQUAL, |
c7ad039d | 4623 | gen_rtx_DIV (int_mode, op0, |
6d26322f JR |
4624 | gen_int_mode |
4625 | (abs_d, | |
c7ad039d | 4626 | int_mode)), |
7543f918 | 4627 | quotient); |
55c2d311 | 4628 | |
c7ad039d | 4629 | quotient = expand_unop (int_mode, neg_optab, |
55c2d311 TG |
4630 | quotient, quotient, 0); |
4631 | } | |
4632 | } | |
34f016ed | 4633 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4634 | { |
4635 | choose_multiplier (abs_d, size, size - 1, | |
079c527f | 4636 | &ml, &post_shift, &lgup); |
fecfbfa4 | 4637 | if (ml < HOST_WIDE_INT_1U << (size - 1)) |
55c2d311 TG |
4638 | { |
4639 | rtx t1, t2, t3; | |
4640 | ||
02a65aef R |
4641 | if (post_shift >= BITS_PER_WORD |
4642 | || size - 1 >= BITS_PER_WORD) | |
4643 | goto fail1; | |
4644 | ||
c7ad039d RS |
4645 | extra_cost = (shift_cost (speed, int_mode, post_shift) |
4646 | + shift_cost (speed, int_mode, size - 1) | |
4647 | + add_cost (speed, int_mode)); | |
2f1cd2eb | 4648 | t1 = expmed_mult_highpart |
c7ad039d | 4649 | (int_mode, op0, gen_int_mode (ml, int_mode), |
2f1cd2eb | 4650 | NULL_RTX, 0, max_cost - extra_cost); |
55c2d311 TG |
4651 | if (t1 == 0) |
4652 | goto fail1; | |
4a90aeeb | 4653 | t2 = expand_shift |
c7ad039d | 4654 | (RSHIFT_EXPR, int_mode, t1, |
eb6c3df1 | 4655 | post_shift, NULL_RTX, 0); |
4a90aeeb | 4656 | t3 = expand_shift |
c7ad039d | 4657 | (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4658 | size - 1, NULL_RTX, 0); |
55c2d311 | 4659 | if (d < 0) |
c5c76735 | 4660 | quotient |
c7ad039d | 4661 | = force_operand (gen_rtx_MINUS (int_mode, t3, t2), |
c5c76735 | 4662 | tquotient); |
55c2d311 | 4663 | else |
c5c76735 | 4664 | quotient |
c7ad039d | 4665 | = force_operand (gen_rtx_MINUS (int_mode, t2, t3), |
c5c76735 | 4666 | tquotient); |
55c2d311 TG |
4667 | } |
4668 | else | |
4669 | { | |
4670 | rtx t1, t2, t3, t4; | |
4671 | ||
02a65aef R |
4672 | if (post_shift >= BITS_PER_WORD |
4673 | || size - 1 >= BITS_PER_WORD) | |
4674 | goto fail1; | |
4675 | ||
dd4786fe | 4676 | ml |= HOST_WIDE_INT_M1U << (size - 1); |
c7ad039d RS |
4677 | mlr = gen_int_mode (ml, int_mode); |
4678 | extra_cost = (shift_cost (speed, int_mode, post_shift) | |
4679 | + shift_cost (speed, int_mode, size - 1) | |
4680 | + 2 * add_cost (speed, int_mode)); | |
4681 | t1 = expmed_mult_highpart (int_mode, op0, mlr, | |
71af73bb TG |
4682 | NULL_RTX, 0, |
4683 | max_cost - extra_cost); | |
55c2d311 TG |
4684 | if (t1 == 0) |
4685 | goto fail1; | |
c7ad039d | 4686 | t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0), |
55c2d311 | 4687 | NULL_RTX); |
4a90aeeb | 4688 | t3 = expand_shift |
c7ad039d | 4689 | (RSHIFT_EXPR, int_mode, t2, |
eb6c3df1 | 4690 | post_shift, NULL_RTX, 0); |
4a90aeeb | 4691 | t4 = expand_shift |
c7ad039d | 4692 | (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4693 | size - 1, NULL_RTX, 0); |
55c2d311 | 4694 | if (d < 0) |
c5c76735 | 4695 | quotient |
c7ad039d | 4696 | = force_operand (gen_rtx_MINUS (int_mode, t4, t3), |
c5c76735 | 4697 | tquotient); |
55c2d311 | 4698 | else |
c5c76735 | 4699 | quotient |
c7ad039d | 4700 | = force_operand (gen_rtx_MINUS (int_mode, t3, t4), |
c5c76735 | 4701 | tquotient); |
55c2d311 TG |
4702 | } |
4703 | } | |
34f016ed TG |
4704 | else /* Too wide mode to use tricky code */ |
4705 | break; | |
55c2d311 | 4706 | |
4e430df8 | 4707 | insn = get_last_insn (); |
7543f918 JR |
4708 | if (insn != last) |
4709 | set_dst_reg_note (insn, REG_EQUAL, | |
c7ad039d | 4710 | gen_rtx_DIV (int_mode, op0, op1), |
7543f918 | 4711 | quotient); |
55c2d311 TG |
4712 | } |
4713 | break; | |
4714 | } | |
4715 | fail1: | |
4716 | delete_insns_since (last); | |
4717 | break; | |
44037a66 | 4718 | |
55c2d311 TG |
4719 | case FLOOR_DIV_EXPR: |
4720 | case FLOOR_MOD_EXPR: | |
4721 | /* We will come here only for signed operations. */ | |
c7ad039d | 4722 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode)) |
55c2d311 | 4723 | { |
c7ad039d RS |
4724 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
4725 | int size = GET_MODE_BITSIZE (int_mode); | |
079c527f | 4726 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4727 | int pre_shift, lgup, post_shift; |
4728 | HOST_WIDE_INT d = INTVAL (op1); | |
4729 | ||
4730 | if (d > 0) | |
4731 | { | |
4732 | /* We could just as easily deal with negative constants here, | |
4733 | but it does not seem worth the trouble for GCC 2.6. */ | |
4734 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4735 | { | |
4736 | pre_shift = floor_log2 (d); | |
4737 | if (rem_flag) | |
4738 | { | |
2f1cd2eb | 4739 | unsigned HOST_WIDE_INT mask |
fecfbfa4 | 4740 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
2f1cd2eb | 4741 | remainder = expand_binop |
c7ad039d RS |
4742 | (int_mode, and_optab, op0, |
4743 | gen_int_mode (mask, int_mode), | |
e34153b0 | 4744 | remainder, 0, methods); |
55c2d311 | 4745 | if (remainder) |
c8dbc8ca | 4746 | return gen_lowpart (mode, remainder); |
55c2d311 | 4747 | } |
4a90aeeb | 4748 | quotient = expand_shift |
c7ad039d | 4749 | (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4750 | pre_shift, tquotient, 0); |
55c2d311 TG |
4751 | } |
4752 | else | |
4753 | { | |
4754 | rtx t1, t2, t3, t4; | |
4755 | ||
4756 | mh = choose_multiplier (d, size, size - 1, | |
4757 | &ml, &post_shift, &lgup); | |
5b0264cb | 4758 | gcc_assert (!mh); |
55c2d311 | 4759 | |
02a65aef R |
4760 | if (post_shift < BITS_PER_WORD |
4761 | && size - 1 < BITS_PER_WORD) | |
55c2d311 | 4762 | { |
4a90aeeb | 4763 | t1 = expand_shift |
c7ad039d | 4764 | (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4765 | size - 1, NULL_RTX, 0); |
c7ad039d | 4766 | t2 = expand_binop (int_mode, xor_optab, op0, t1, |
02a65aef | 4767 | NULL_RTX, 0, OPTAB_WIDEN); |
c7ad039d RS |
4768 | extra_cost = (shift_cost (speed, int_mode, post_shift) |
4769 | + shift_cost (speed, int_mode, size - 1) | |
4770 | + 2 * add_cost (speed, int_mode)); | |
2f1cd2eb | 4771 | t3 = expmed_mult_highpart |
c7ad039d | 4772 | (int_mode, t2, gen_int_mode (ml, int_mode), |
2f1cd2eb | 4773 | NULL_RTX, 1, max_cost - extra_cost); |
02a65aef R |
4774 | if (t3 != 0) |
4775 | { | |
4a90aeeb | 4776 | t4 = expand_shift |
c7ad039d | 4777 | (RSHIFT_EXPR, int_mode, t3, |
eb6c3df1 | 4778 | post_shift, NULL_RTX, 1); |
c7ad039d | 4779 | quotient = expand_binop (int_mode, xor_optab, |
02a65aef R |
4780 | t4, t1, tquotient, 0, |
4781 | OPTAB_WIDEN); | |
4782 | } | |
55c2d311 TG |
4783 | } |
4784 | } | |
4785 | } | |
4786 | else | |
4787 | { | |
4788 | rtx nsign, t1, t2, t3, t4; | |
c7ad039d | 4789 | t1 = force_operand (gen_rtx_PLUS (int_mode, |
38a448ca | 4790 | op0, constm1_rtx), NULL_RTX); |
c7ad039d | 4791 | t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX, |
55c2d311 | 4792 | 0, OPTAB_WIDEN); |
c7ad039d | 4793 | nsign = expand_shift (RSHIFT_EXPR, int_mode, t2, |
76a7314d | 4794 | size - 1, NULL_RTX, 0); |
c7ad039d | 4795 | t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), |
55c2d311 | 4796 | NULL_RTX); |
c7ad039d | 4797 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1, |
55c2d311 TG |
4798 | NULL_RTX, 0); |
4799 | if (t4) | |
4800 | { | |
4801 | rtx t5; | |
c7ad039d | 4802 | t5 = expand_unop (int_mode, one_cmpl_optab, nsign, |
55c2d311 | 4803 | NULL_RTX, 0); |
c7ad039d | 4804 | quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5), |
55c2d311 TG |
4805 | tquotient); |
4806 | } | |
4807 | } | |
4808 | } | |
4809 | ||
4810 | if (quotient != 0) | |
4811 | break; | |
4812 | delete_insns_since (last); | |
4813 | ||
4814 | /* Try using an instruction that produces both the quotient and | |
4815 | remainder, using truncation. We can easily compensate the quotient | |
4816 | or remainder to get floor rounding, once we have the remainder. | |
4817 | Notice that we compute also the final remainder value here, | |
4818 | and return the result right away. */ | |
a45cf58c | 4819 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4820 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4821 | |
55c2d311 TG |
4822 | if (rem_flag) |
4823 | { | |
668443c9 | 4824 | remainder |
f8cfc6aa | 4825 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4826 | quotient = gen_reg_rtx (compute_mode); |
4827 | } | |
4828 | else | |
4829 | { | |
668443c9 | 4830 | quotient |
f8cfc6aa | 4831 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4832 | remainder = gen_reg_rtx (compute_mode); |
4833 | } | |
4834 | ||
4835 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4836 | quotient, remainder, 0)) | |
4837 | { | |
4838 | /* This could be computed with a branch-less sequence. | |
4839 | Save that for later. */ | |
4840 | rtx tem; | |
f3f6fb16 | 4841 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 | 4842 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
55c2d311 TG |
4843 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4844 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4845 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
55c2d311 TG |
4846 | expand_dec (quotient, const1_rtx); |
4847 | expand_inc (remainder, op1); | |
4848 | emit_label (label); | |
c8dbc8ca | 4849 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4850 | } |
4851 | ||
4852 | /* No luck with division elimination or divmod. Have to do it | |
4853 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4854 | { |
f3f6fb16 | 4855 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4856 | rtx adjusted_op0; |
4857 | rtx tem; | |
4858 | ||
4859 | quotient = gen_reg_rtx (compute_mode); | |
4860 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4861 | label1 = gen_label_rtx (); | |
4862 | label2 = gen_label_rtx (); | |
4863 | label3 = gen_label_rtx (); | |
4864 | label4 = gen_label_rtx (); | |
4865 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4866 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4867 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
55c2d311 | 4868 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
e34153b0 | 4869 | quotient, 0, methods); |
55c2d311 TG |
4870 | if (tem != quotient) |
4871 | emit_move_insn (quotient, tem); | |
ec4a505f | 4872 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4873 | emit_barrier (); |
4874 | emit_label (label1); | |
44037a66 | 4875 | expand_inc (adjusted_op0, const1_rtx); |
ec4a505f | 4876 | emit_jump_insn (targetm.gen_jump (label4)); |
55c2d311 TG |
4877 | emit_barrier (); |
4878 | emit_label (label2); | |
f5963e61 | 4879 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
55c2d311 | 4880 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
e34153b0 | 4881 | quotient, 0, methods); |
55c2d311 TG |
4882 | if (tem != quotient) |
4883 | emit_move_insn (quotient, tem); | |
ec4a505f | 4884 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4885 | emit_barrier (); |
4886 | emit_label (label3); | |
4887 | expand_dec (adjusted_op0, const1_rtx); | |
4888 | emit_label (label4); | |
4889 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
e34153b0 | 4890 | quotient, 0, methods); |
55c2d311 TG |
4891 | if (tem != quotient) |
4892 | emit_move_insn (quotient, tem); | |
4893 | expand_dec (quotient, const1_rtx); | |
4894 | emit_label (label5); | |
44037a66 | 4895 | } |
55c2d311 | 4896 | break; |
44037a66 | 4897 | |
55c2d311 TG |
4898 | case CEIL_DIV_EXPR: |
4899 | case CEIL_MOD_EXPR: | |
4900 | if (unsignedp) | |
4901 | { | |
76a7314d JJ |
4902 | if (op1_is_constant |
4903 | && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
c7ad039d | 4904 | && (HWI_COMPUTABLE_MODE_P (compute_mode) |
76a7314d | 4905 | || INTVAL (op1) >= 0)) |
9176af2f | 4906 | { |
c7ad039d RS |
4907 | scalar_int_mode int_mode |
4908 | = as_a <scalar_int_mode> (compute_mode); | |
9176af2f TG |
4909 | rtx t1, t2, t3; |
4910 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
c7ad039d | 4911 | t1 = expand_shift (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 4912 | floor_log2 (d), tquotient, 1); |
c7ad039d RS |
4913 | t2 = expand_binop (int_mode, and_optab, op0, |
4914 | gen_int_mode (d - 1, int_mode), | |
e34153b0 | 4915 | NULL_RTX, 1, methods); |
c7ad039d RS |
4916 | t3 = gen_reg_rtx (int_mode); |
4917 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1); | |
412381d9 TG |
4918 | if (t3 == 0) |
4919 | { | |
f3f6fb16 | 4920 | rtx_code_label *lab; |
412381d9 | 4921 | lab = gen_label_rtx (); |
c7ad039d | 4922 | do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab); |
412381d9 TG |
4923 | expand_inc (t1, const1_rtx); |
4924 | emit_label (lab); | |
4925 | quotient = t1; | |
4926 | } | |
4927 | else | |
c7ad039d | 4928 | quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3), |
412381d9 | 4929 | tquotient); |
9176af2f TG |
4930 | break; |
4931 | } | |
55c2d311 TG |
4932 | |
4933 | /* Try using an instruction that produces both the quotient and | |
4934 | remainder, using truncation. We can easily compensate the | |
4935 | quotient or remainder to get ceiling rounding, once we have the | |
4936 | remainder. Notice that we compute also the final remainder | |
4937 | value here, and return the result right away. */ | |
a45cf58c | 4938 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4939 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4940 | |
55c2d311 TG |
4941 | if (rem_flag) |
4942 | { | |
f8cfc6aa | 4943 | remainder = (REG_P (target) |
668443c9 | 4944 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4945 | quotient = gen_reg_rtx (compute_mode); |
4946 | } | |
4947 | else | |
4948 | { | |
f8cfc6aa | 4949 | quotient = (REG_P (target) |
668443c9 | 4950 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4951 | remainder = gen_reg_rtx (compute_mode); |
4952 | } | |
4953 | ||
4954 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4955 | remainder, 1)) | |
4956 | { | |
4957 | /* This could be computed with a branch-less sequence. | |
4958 | Save that for later. */ | |
f3f6fb16 | 4959 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4960 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4961 | compute_mode, label); | |
55c2d311 TG |
4962 | expand_inc (quotient, const1_rtx); |
4963 | expand_dec (remainder, op1); | |
4964 | emit_label (label); | |
c8dbc8ca | 4965 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4966 | } |
4967 | ||
4968 | /* No luck with division elimination or divmod. Have to do it | |
4969 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4970 | { |
f3f6fb16 | 4971 | rtx_code_label *label1, *label2; |
55c2d311 TG |
4972 | rtx adjusted_op0, tem; |
4973 | ||
4974 | quotient = gen_reg_rtx (compute_mode); | |
4975 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4976 | label1 = gen_label_rtx (); | |
4977 | label2 = gen_label_rtx (); | |
f5963e61 JL |
4978 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4979 | compute_mode, label1); | |
55c2d311 | 4980 | emit_move_insn (quotient, const0_rtx); |
ec4a505f | 4981 | emit_jump_insn (targetm.gen_jump (label2)); |
55c2d311 TG |
4982 | emit_barrier (); |
4983 | emit_label (label1); | |
4984 | expand_dec (adjusted_op0, const1_rtx); | |
4985 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
e34153b0 | 4986 | quotient, 1, methods); |
55c2d311 TG |
4987 | if (tem != quotient) |
4988 | emit_move_insn (quotient, tem); | |
4989 | expand_inc (quotient, const1_rtx); | |
4990 | emit_label (label2); | |
44037a66 | 4991 | } |
55c2d311 TG |
4992 | } |
4993 | else /* signed */ | |
4994 | { | |
73f27728 RK |
4995 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4996 | && INTVAL (op1) >= 0) | |
4997 | { | |
4998 | /* This is extremely similar to the code for the unsigned case | |
4999 | above. For 2.7 we should merge these variants, but for | |
5000 | 2.6.1 I don't want to touch the code for unsigned since that | |
5001 | get used in C. The signed case will only be used by other | |
5002 | languages (Ada). */ | |
5003 | ||
5004 | rtx t1, t2, t3; | |
5005 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
5006 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 5007 | floor_log2 (d), tquotient, 0); |
73f27728 | 5008 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 5009 | gen_int_mode (d - 1, compute_mode), |
e34153b0 | 5010 | NULL_RTX, 1, methods); |
73f27728 RK |
5011 | t3 = gen_reg_rtx (compute_mode); |
5012 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
5013 | compute_mode, 1, 1); | |
5014 | if (t3 == 0) | |
5015 | { | |
f3f6fb16 | 5016 | rtx_code_label *lab; |
73f27728 | 5017 | lab = gen_label_rtx (); |
f5963e61 | 5018 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
73f27728 RK |
5019 | expand_inc (t1, const1_rtx); |
5020 | emit_label (lab); | |
5021 | quotient = t1; | |
5022 | } | |
5023 | else | |
38a448ca RH |
5024 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
5025 | t1, t3), | |
73f27728 RK |
5026 | tquotient); |
5027 | break; | |
5028 | } | |
5029 | ||
55c2d311 TG |
5030 | /* Try using an instruction that produces both the quotient and |
5031 | remainder, using truncation. We can easily compensate the | |
5032 | quotient or remainder to get ceiling rounding, once we have the | |
5033 | remainder. Notice that we compute also the final remainder | |
5034 | value here, and return the result right away. */ | |
a45cf58c | 5035 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 TG |
5036 | target = gen_reg_rtx (compute_mode); |
5037 | if (rem_flag) | |
5038 | { | |
f8cfc6aa | 5039 | remainder= (REG_P (target) |
668443c9 | 5040 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
5041 | quotient = gen_reg_rtx (compute_mode); |
5042 | } | |
5043 | else | |
5044 | { | |
f8cfc6aa | 5045 | quotient = (REG_P (target) |
668443c9 | 5046 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
5047 | remainder = gen_reg_rtx (compute_mode); |
5048 | } | |
5049 | ||
5050 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
5051 | remainder, 0)) | |
5052 | { | |
5053 | /* This could be computed with a branch-less sequence. | |
5054 | Save that for later. */ | |
5055 | rtx tem; | |
f3f6fb16 | 5056 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
5057 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
5058 | compute_mode, label); | |
55c2d311 TG |
5059 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
5060 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 5061 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
55c2d311 TG |
5062 | expand_inc (quotient, const1_rtx); |
5063 | expand_dec (remainder, op1); | |
5064 | emit_label (label); | |
c8dbc8ca | 5065 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
5066 | } |
5067 | ||
5068 | /* No luck with division elimination or divmod. Have to do it | |
5069 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 5070 | { |
f3f6fb16 | 5071 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
5072 | rtx adjusted_op0; |
5073 | rtx tem; | |
5074 | ||
5075 | quotient = gen_reg_rtx (compute_mode); | |
5076 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
5077 | label1 = gen_label_rtx (); | |
5078 | label2 = gen_label_rtx (); | |
5079 | label3 = gen_label_rtx (); | |
5080 | label4 = gen_label_rtx (); | |
5081 | label5 = gen_label_rtx (); | |
f5963e61 JL |
5082 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
5083 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
5084 | compute_mode, label1); | |
55c2d311 | 5085 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
e34153b0 | 5086 | quotient, 0, methods); |
55c2d311 TG |
5087 | if (tem != quotient) |
5088 | emit_move_insn (quotient, tem); | |
ec4a505f | 5089 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
5090 | emit_barrier (); |
5091 | emit_label (label1); | |
5092 | expand_dec (adjusted_op0, const1_rtx); | |
ec4a505f | 5093 | emit_jump_insn (targetm.gen_jump (label4)); |
55c2d311 TG |
5094 | emit_barrier (); |
5095 | emit_label (label2); | |
f5963e61 JL |
5096 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
5097 | compute_mode, label3); | |
55c2d311 | 5098 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
e34153b0 | 5099 | quotient, 0, methods); |
55c2d311 TG |
5100 | if (tem != quotient) |
5101 | emit_move_insn (quotient, tem); | |
ec4a505f | 5102 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
5103 | emit_barrier (); |
5104 | emit_label (label3); | |
5105 | expand_inc (adjusted_op0, const1_rtx); | |
5106 | emit_label (label4); | |
5107 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
e34153b0 | 5108 | quotient, 0, methods); |
55c2d311 TG |
5109 | if (tem != quotient) |
5110 | emit_move_insn (quotient, tem); | |
5111 | expand_inc (quotient, const1_rtx); | |
5112 | emit_label (label5); | |
44037a66 | 5113 | } |
55c2d311 TG |
5114 | } |
5115 | break; | |
bc1c7e93 | 5116 | |
55c2d311 | 5117 | case EXACT_DIV_EXPR: |
c7ad039d | 5118 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode)) |
55c2d311 | 5119 | { |
c7ad039d RS |
5120 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
5121 | int size = GET_MODE_BITSIZE (int_mode); | |
55c2d311 TG |
5122 | HOST_WIDE_INT d = INTVAL (op1); |
5123 | unsigned HOST_WIDE_INT ml; | |
91ce572a | 5124 | int pre_shift; |
55c2d311 TG |
5125 | rtx t1; |
5126 | ||
146ec50f | 5127 | pre_shift = ctz_or_zero (d); |
91ce572a | 5128 | ml = invert_mod2n (d >> pre_shift, size); |
c7ad039d | 5129 | t1 = expand_shift (RSHIFT_EXPR, int_mode, op0, |
eb6c3df1 | 5130 | pre_shift, NULL_RTX, unsignedp); |
c7ad039d | 5131 | quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode), |
31ff3e0b | 5132 | NULL_RTX, 1); |
55c2d311 TG |
5133 | |
5134 | insn = get_last_insn (); | |
7543f918 JR |
5135 | set_dst_reg_note (insn, REG_EQUAL, |
5136 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, | |
c7ad039d | 5137 | int_mode, op0, op1), |
7543f918 | 5138 | quotient); |
55c2d311 TG |
5139 | } |
5140 | break; | |
5141 | ||
5142 | case ROUND_DIV_EXPR: | |
5143 | case ROUND_MOD_EXPR: | |
69f61901 RK |
5144 | if (unsignedp) |
5145 | { | |
c7ad039d | 5146 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
69f61901 | 5147 | rtx tem; |
f3f6fb16 | 5148 | rtx_code_label *label; |
69f61901 | 5149 | label = gen_label_rtx (); |
c7ad039d RS |
5150 | quotient = gen_reg_rtx (int_mode); |
5151 | remainder = gen_reg_rtx (int_mode); | |
69f61901 RK |
5152 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) |
5153 | { | |
5154 | rtx tem; | |
c7ad039d | 5155 | quotient = expand_binop (int_mode, udiv_optab, op0, op1, |
e34153b0 | 5156 | quotient, 1, methods); |
c7ad039d RS |
5157 | tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1); |
5158 | remainder = expand_binop (int_mode, sub_optab, op0, tem, | |
e34153b0 | 5159 | remainder, 1, methods); |
69f61901 | 5160 | } |
c7ad039d RS |
5161 | tem = plus_constant (int_mode, op1, -1); |
5162 | tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1); | |
5163 | do_cmp_and_jump (remainder, tem, LEU, int_mode, label); | |
69f61901 RK |
5164 | expand_inc (quotient, const1_rtx); |
5165 | expand_dec (remainder, op1); | |
5166 | emit_label (label); | |
5167 | } | |
5168 | else | |
5169 | { | |
c7ad039d RS |
5170 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
5171 | int size = GET_MODE_BITSIZE (int_mode); | |
69f61901 | 5172 | rtx abs_rem, abs_op1, tem, mask; |
f3f6fb16 | 5173 | rtx_code_label *label; |
69f61901 | 5174 | label = gen_label_rtx (); |
c7ad039d RS |
5175 | quotient = gen_reg_rtx (int_mode); |
5176 | remainder = gen_reg_rtx (int_mode); | |
69f61901 RK |
5177 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) |
5178 | { | |
5179 | rtx tem; | |
c7ad039d | 5180 | quotient = expand_binop (int_mode, sdiv_optab, op0, op1, |
e34153b0 | 5181 | quotient, 0, methods); |
c7ad039d RS |
5182 | tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0); |
5183 | remainder = expand_binop (int_mode, sub_optab, op0, tem, | |
e34153b0 | 5184 | remainder, 0, methods); |
69f61901 | 5185 | } |
c7ad039d RS |
5186 | abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0); |
5187 | abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0); | |
5188 | tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem, | |
eb6c3df1 | 5189 | 1, NULL_RTX, 1); |
c7ad039d RS |
5190 | do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label); |
5191 | tem = expand_binop (int_mode, xor_optab, op0, op1, | |
69f61901 | 5192 | NULL_RTX, 0, OPTAB_WIDEN); |
c7ad039d | 5193 | mask = expand_shift (RSHIFT_EXPR, int_mode, tem, |
eb6c3df1 | 5194 | size - 1, NULL_RTX, 0); |
c7ad039d | 5195 | tem = expand_binop (int_mode, xor_optab, mask, const1_rtx, |
69f61901 | 5196 | NULL_RTX, 0, OPTAB_WIDEN); |
c7ad039d | 5197 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
69f61901 RK |
5198 | NULL_RTX, 0, OPTAB_WIDEN); |
5199 | expand_inc (quotient, tem); | |
c7ad039d | 5200 | tem = expand_binop (int_mode, xor_optab, mask, op1, |
69f61901 | 5201 | NULL_RTX, 0, OPTAB_WIDEN); |
c7ad039d | 5202 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
69f61901 RK |
5203 | NULL_RTX, 0, OPTAB_WIDEN); |
5204 | expand_dec (remainder, tem); | |
5205 | emit_label (label); | |
5206 | } | |
5207 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
c410d49e | 5208 | |
e9a25f70 | 5209 | default: |
5b0264cb | 5210 | gcc_unreachable (); |
55c2d311 | 5211 | } |
44037a66 | 5212 | |
55c2d311 | 5213 | if (quotient == 0) |
44037a66 | 5214 | { |
a45cf58c RK |
5215 | if (target && GET_MODE (target) != compute_mode) |
5216 | target = 0; | |
5217 | ||
55c2d311 | 5218 | if (rem_flag) |
44037a66 | 5219 | { |
32fdf36b | 5220 | /* Try to produce the remainder without producing the quotient. |
d6a7951f | 5221 | If we seem to have a divmod pattern that does not require widening, |
b20b352b | 5222 | don't try widening here. We should really have a WIDEN argument |
32fdf36b TG |
5223 | to expand_twoval_binop, since what we'd really like to do here is |
5224 | 1) try a mod insn in compute_mode | |
5225 | 2) try a divmod insn in compute_mode | |
5226 | 3) try a div insn in compute_mode and multiply-subtract to get | |
5227 | remainder | |
5228 | 4) try the same things with widening allowed. */ | |
5229 | remainder | |
5230 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5231 | op0, op1, target, | |
5232 | unsignedp, | |
947131ba | 5233 | ((optab_handler (optab2, compute_mode) |
32fdf36b TG |
5234 | != CODE_FOR_nothing) |
5235 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
55c2d311 | 5236 | if (remainder == 0) |
44037a66 TG |
5237 | { |
5238 | /* No luck there. Can we do remainder and divide at once | |
5239 | without a library call? */ | |
55c2d311 TG |
5240 | remainder = gen_reg_rtx (compute_mode); |
5241 | if (! expand_twoval_binop ((unsignedp | |
5242 | ? udivmod_optab | |
5243 | : sdivmod_optab), | |
5244 | op0, op1, | |
5245 | NULL_RTX, remainder, unsignedp)) | |
5246 | remainder = 0; | |
44037a66 | 5247 | } |
55c2d311 TG |
5248 | |
5249 | if (remainder) | |
5250 | return gen_lowpart (mode, remainder); | |
44037a66 | 5251 | } |
44037a66 | 5252 | |
dc38b292 RK |
5253 | /* Produce the quotient. Try a quotient insn, but not a library call. |
5254 | If we have a divmod in this mode, use it in preference to widening | |
5255 | the div (for this test we assume it will not fail). Note that optab2 | |
5256 | is set to the one of the two optabs that the call below will use. */ | |
5257 | quotient | |
5258 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
5259 | op0, op1, rem_flag ? NULL_RTX : target, | |
5260 | unsignedp, | |
947131ba | 5261 | ((optab_handler (optab2, compute_mode) |
dc38b292 RK |
5262 | != CODE_FOR_nothing) |
5263 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
5264 | ||
55c2d311 | 5265 | if (quotient == 0) |
44037a66 TG |
5266 | { |
5267 | /* No luck there. Try a quotient-and-remainder insn, | |
5268 | keeping the quotient alone. */ | |
55c2d311 | 5269 | quotient = gen_reg_rtx (compute_mode); |
44037a66 | 5270 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
55c2d311 TG |
5271 | op0, op1, |
5272 | quotient, NULL_RTX, unsignedp)) | |
5273 | { | |
5274 | quotient = 0; | |
5275 | if (! rem_flag) | |
5276 | /* Still no luck. If we are not computing the remainder, | |
5277 | use a library call for the quotient. */ | |
5278 | quotient = sign_expand_binop (compute_mode, | |
5279 | udiv_optab, sdiv_optab, | |
5280 | op0, op1, target, | |
e34153b0 | 5281 | unsignedp, methods); |
55c2d311 | 5282 | } |
44037a66 | 5283 | } |
44037a66 TG |
5284 | } |
5285 | ||
44037a66 TG |
5286 | if (rem_flag) |
5287 | { | |
a45cf58c RK |
5288 | if (target && GET_MODE (target) != compute_mode) |
5289 | target = 0; | |
5290 | ||
55c2d311 | 5291 | if (quotient == 0) |
b3f8d95d MM |
5292 | { |
5293 | /* No divide instruction either. Use library for remainder. */ | |
5294 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5295 | op0, op1, target, | |
e34153b0 | 5296 | unsignedp, methods); |
b3f8d95d MM |
5297 | /* No remainder function. Try a quotient-and-remainder |
5298 | function, keeping the remainder. */ | |
e34153b0 JJ |
5299 | if (!remainder |
5300 | && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN)) | |
b3f8d95d MM |
5301 | { |
5302 | remainder = gen_reg_rtx (compute_mode); | |
b8698a0f | 5303 | if (!expand_twoval_binop_libfunc |
b3f8d95d MM |
5304 | (unsignedp ? udivmod_optab : sdivmod_optab, |
5305 | op0, op1, | |
5306 | NULL_RTX, remainder, | |
5307 | unsignedp ? UMOD : MOD)) | |
5308 | remainder = NULL_RTX; | |
5309 | } | |
5310 | } | |
44037a66 TG |
5311 | else |
5312 | { | |
5313 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
55c2d311 TG |
5314 | remainder = expand_mult (compute_mode, quotient, op1, |
5315 | NULL_RTX, unsignedp); | |
5316 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
5317 | remainder, target, unsignedp, | |
e34153b0 | 5318 | methods); |
44037a66 TG |
5319 | } |
5320 | } | |
5321 | ||
e34153b0 JJ |
5322 | if (methods != OPTAB_LIB_WIDEN |
5323 | && (rem_flag ? remainder : quotient) == NULL_RTX) | |
5324 | return NULL_RTX; | |
5325 | ||
55c2d311 | 5326 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
44037a66 TG |
5327 | } |
5328 | \f | |
5329 | /* Return a tree node with data type TYPE, describing the value of X. | |
4dfa0342 | 5330 | Usually this is an VAR_DECL, if there is no obvious better choice. |
44037a66 | 5331 | X may be an expression, however we only support those expressions |
6d2f8887 | 5332 | generated by loop.c. */ |
44037a66 TG |
5333 | |
5334 | tree | |
502b8322 | 5335 | make_tree (tree type, rtx x) |
44037a66 TG |
5336 | { |
5337 | tree t; | |
5338 | ||
5339 | switch (GET_CODE (x)) | |
5340 | { | |
5341 | case CONST_INT: | |
807e902e | 5342 | case CONST_WIDE_INT: |
f079167a | 5343 | t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type))); |
807e902e | 5344 | return t; |
b8698a0f | 5345 | |
44037a66 | 5346 | case CONST_DOUBLE: |
807e902e KZ |
5347 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
5348 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) | |
5349 | t = wide_int_to_tree (type, | |
5350 | wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, | |
5351 | HOST_BITS_PER_WIDE_INT * 2)); | |
44037a66 | 5352 | else |
34a72c33 | 5353 | t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); |
44037a66 TG |
5354 | |
5355 | return t; | |
69ef87e2 AH |
5356 | |
5357 | case CONST_VECTOR: | |
5358 | { | |
16c78b66 RS |
5359 | unsigned int npatterns = CONST_VECTOR_NPATTERNS (x); |
5360 | unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); | |
b8b7f162 | 5361 | tree itype = TREE_TYPE (type); |
69ef87e2 | 5362 | |
69ef87e2 | 5363 | /* Build a tree with vector elements. */ |
16c78b66 RS |
5364 | tree_vector_builder elts (type, npatterns, nelts_per_pattern); |
5365 | unsigned int count = elts.encoded_nelts (); | |
5366 | for (unsigned int i = 0; i < count; ++i) | |
69ef87e2 | 5367 | { |
b8b7f162 | 5368 | rtx elt = CONST_VECTOR_ELT (x, i); |
794e3180 | 5369 | elts.quick_push (make_tree (itype, elt)); |
69ef87e2 | 5370 | } |
c410d49e | 5371 | |
5ebaa477 | 5372 | return elts.build (); |
69ef87e2 AH |
5373 | } |
5374 | ||
44037a66 | 5375 | case PLUS: |
4845b383 KH |
5376 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5377 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5378 | |
44037a66 | 5379 | case MINUS: |
4845b383 KH |
5380 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5381 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5382 | |
44037a66 | 5383 | case NEG: |
4845b383 | 5384 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
44037a66 TG |
5385 | |
5386 | case MULT: | |
4845b383 KH |
5387 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5388 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5389 | |
44037a66 | 5390 | case ASHIFT: |
4845b383 KH |
5391 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5392 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5393 | |
44037a66 | 5394 | case LSHIFTRT: |
ca5ba2a3 | 5395 | t = unsigned_type_for (type); |
aeba6c28 JM |
5396 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5397 | make_tree (t, XEXP (x, 0)), | |
5398 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5399 | |
44037a66 | 5400 | case ASHIFTRT: |
12753674 | 5401 | t = signed_type_for (type); |
aeba6c28 JM |
5402 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5403 | make_tree (t, XEXP (x, 0)), | |
5404 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5405 | |
44037a66 TG |
5406 | case DIV: |
5407 | if (TREE_CODE (type) != REAL_TYPE) | |
12753674 | 5408 | t = signed_type_for (type); |
44037a66 TG |
5409 | else |
5410 | t = type; | |
5411 | ||
aeba6c28 JM |
5412 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5413 | make_tree (t, XEXP (x, 0)), | |
5414 | make_tree (t, XEXP (x, 1)))); | |
44037a66 | 5415 | case UDIV: |
ca5ba2a3 | 5416 | t = unsigned_type_for (type); |
aeba6c28 JM |
5417 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5418 | make_tree (t, XEXP (x, 0)), | |
5419 | make_tree (t, XEXP (x, 1)))); | |
5c45425b RH |
5420 | |
5421 | case SIGN_EXTEND: | |
5422 | case ZERO_EXTEND: | |
ae2bcd98 RS |
5423 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5424 | GET_CODE (x) == ZERO_EXTEND); | |
aeba6c28 | 5425 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5c45425b | 5426 | |
84816907 | 5427 | case CONST: |
2072a319 | 5428 | return make_tree (type, XEXP (x, 0)); |
84816907 JM |
5429 | |
5430 | case SYMBOL_REF: | |
5431 | t = SYMBOL_REF_DECL (x); | |
5432 | if (t) | |
5433 | return fold_convert (type, build_fold_addr_expr (t)); | |
191816a3 | 5434 | /* fall through. */ |
84816907 | 5435 | |
4dfa0342 | 5436 | default: |
36fd6408 RS |
5437 | if (CONST_POLY_INT_P (x)) |
5438 | return wide_int_to_tree (t, const_poly_int_value (x)); | |
5439 | ||
c2255bc4 | 5440 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
d1608933 | 5441 | |
d4ebfa65 BE |
5442 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5443 | address mode to pointer mode. */ | |
5ae6cd0d | 5444 | if (POINTER_TYPE_P (type)) |
d4ebfa65 | 5445 | x = convert_memory_address_addr_space |
7a504f33 | 5446 | (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); |
d1608933 | 5447 | |
8a0aa06e RH |
5448 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5449 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
820cc88f | 5450 | t->decl_with_rtl.rtl = x; |
4dfa0342 | 5451 | |
44037a66 TG |
5452 | return t; |
5453 | } | |
5454 | } | |
44037a66 TG |
5455 | \f |
5456 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5457 | and returning TARGET. | |
5458 | ||
5459 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5460 | ||
5461 | rtx | |
ef4bddc2 | 5462 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
44037a66 | 5463 | { |
22273300 | 5464 | rtx tem = 0; |
44037a66 | 5465 | |
22273300 JJ |
5466 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5467 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5468 | if (tem == 0) | |
44037a66 | 5469 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
44037a66 TG |
5470 | |
5471 | if (target == 0) | |
5472 | target = tem; | |
5473 | else if (tem != target) | |
5474 | emit_move_insn (target, tem); | |
5475 | return target; | |
5476 | } | |
495499da | 5477 | |
a41a56b6 | 5478 | /* Helper function for emit_store_flag. */ |
2d52a3a1 | 5479 | rtx |
ef12ae45 | 5480 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
ef4bddc2 | 5481 | machine_mode mode, machine_mode compare_mode, |
92355a9c | 5482 | int unsignedp, rtx x, rtx y, int normalizep, |
ef4bddc2 | 5483 | machine_mode target_mode) |
a41a56b6 | 5484 | { |
99b1c316 | 5485 | class expand_operand ops[4]; |
f3f6fb16 DM |
5486 | rtx op0, comparison, subtarget; |
5487 | rtx_insn *last; | |
7cc237a6 RS |
5488 | scalar_int_mode result_mode = targetm.cstore_mode (icode); |
5489 | scalar_int_mode int_target_mode; | |
45475a3f PB |
5490 | |
5491 | last = get_last_insn (); | |
5492 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5493 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
a5c7d693 | 5494 | if (!x || !y) |
45475a3f PB |
5495 | { |
5496 | delete_insns_since (last); | |
5497 | return NULL_RTX; | |
5498 | } | |
5499 | ||
92355a9c | 5500 | if (target_mode == VOIDmode) |
7cc237a6 RS |
5501 | int_target_mode = result_mode; |
5502 | else | |
5503 | int_target_mode = as_a <scalar_int_mode> (target_mode); | |
92355a9c | 5504 | if (!target) |
7cc237a6 | 5505 | target = gen_reg_rtx (int_target_mode); |
b8698a0f | 5506 | |
a5c7d693 | 5507 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
45475a3f | 5508 | |
a5c7d693 RS |
5509 | create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); |
5510 | create_fixed_operand (&ops[1], comparison); | |
5511 | create_fixed_operand (&ops[2], x); | |
5512 | create_fixed_operand (&ops[3], y); | |
5513 | if (!maybe_expand_insn (icode, 4, ops)) | |
5514 | { | |
5515 | delete_insns_since (last); | |
5516 | return NULL_RTX; | |
5517 | } | |
5518 | subtarget = ops[0].value; | |
495499da | 5519 | |
a41a56b6 | 5520 | /* If we are converting to a wider mode, first convert to |
7cc237a6 | 5521 | INT_TARGET_MODE, then normalize. This produces better combining |
a41a56b6 RE |
5522 | opportunities on machines that have a SIGN_EXTRACT when we are |
5523 | testing a single bit. This mostly benefits the 68k. | |
5524 | ||
5525 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5526 | interpreted in MODE, we can do this conversion as unsigned, which | |
5527 | is usually more efficient. */ | |
19ef5a8f | 5528 | if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode)) |
a41a56b6 | 5529 | { |
19ef5a8f AS |
5530 | gcc_assert (GET_MODE_PRECISION (result_mode) != 1 |
5531 | || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1); | |
5532 | ||
5533 | bool unsignedp = (STORE_FLAG_VALUE >= 0); | |
5534 | convert_move (target, subtarget, unsignedp); | |
5535 | ||
a41a56b6 | 5536 | op0 = target; |
7cc237a6 | 5537 | result_mode = int_target_mode; |
a41a56b6 RE |
5538 | } |
5539 | else | |
5540 | op0 = subtarget; | |
5541 | ||
5542 | /* If we want to keep subexpressions around, don't reuse our last | |
5543 | target. */ | |
5544 | if (optimize) | |
5545 | subtarget = 0; | |
5546 | ||
5547 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5548 | have to do anything. */ | |
5549 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5550 | ; | |
5551 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5552 | the comparison this way to avoid a compiler-time warning. */ | |
5553 | else if (- normalizep == STORE_FLAG_VALUE) | |
45475a3f | 5554 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
a41a56b6 RE |
5555 | |
5556 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5557 | it hard to use a value of just the sign bit due to ANSI integer | |
5558 | constant typing rules. */ | |
2d0c270f | 5559 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
45475a3f | 5560 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, |
eb6c3df1 | 5561 | GET_MODE_BITSIZE (result_mode) - 1, subtarget, |
a41a56b6 RE |
5562 | normalizep == 1); |
5563 | else | |
5564 | { | |
5565 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5566 | ||
45475a3f | 5567 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
a41a56b6 | 5568 | if (normalizep == -1) |
45475a3f | 5569 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
a41a56b6 RE |
5570 | } |
5571 | ||
5572 | /* If we were converting to a smaller mode, do the conversion now. */ | |
7cc237a6 | 5573 | if (int_target_mode != result_mode) |
a41a56b6 RE |
5574 | { |
5575 | convert_move (target, op0, 0); | |
5576 | return target; | |
5577 | } | |
5578 | else | |
5579 | return op0; | |
5580 | } | |
5581 | ||
44037a66 | 5582 | |
ef12ae45 PB |
5583 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5584 | need a recursive call. These are kept separate to avoid infinite | |
5585 | loops. */ | |
44037a66 | 5586 | |
ef12ae45 PB |
5587 | static rtx |
5588 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
ef4bddc2 RS |
5589 | machine_mode mode, int unsignedp, int normalizep, |
5590 | machine_mode target_mode) | |
44037a66 TG |
5591 | { |
5592 | rtx subtarget; | |
5593 | enum insn_code icode; | |
ef4bddc2 | 5594 | machine_mode compare_mode; |
f90b7a5a | 5595 | enum mode_class mclass; |
45475a3f | 5596 | enum rtx_code scode; |
44037a66 | 5597 | |
b30f05db BS |
5598 | if (unsignedp) |
5599 | code = unsigned_condition (code); | |
45475a3f | 5600 | scode = swap_condition (code); |
b30f05db | 5601 | |
c2615a67 RK |
5602 | /* If one operand is constant, make it the second one. Only do this |
5603 | if the other operand is not constant as well. */ | |
5604 | ||
8c9864f3 | 5605 | if (swap_commutative_operands_p (op0, op1)) |
c2615a67 | 5606 | { |
fab27f52 | 5607 | std::swap (op0, op1); |
c2615a67 RK |
5608 | code = swap_condition (code); |
5609 | } | |
5610 | ||
6405e07b DE |
5611 | if (mode == VOIDmode) |
5612 | mode = GET_MODE (op0); | |
5613 | ||
ec18e48e VL |
5614 | if (CONST_SCALAR_INT_P (op1)) |
5615 | canonicalize_comparison (mode, &code, &op1); | |
5616 | ||
c410d49e | 5617 | /* For some comparisons with 1 and -1, we can convert this to |
44037a66 | 5618 | comparisons with zero. This will often produce more opportunities for |
0f41302f | 5619 | store-flag insns. */ |
44037a66 TG |
5620 | |
5621 | switch (code) | |
5622 | { | |
5623 | case LT: | |
5624 | if (op1 == const1_rtx) | |
5625 | op1 = const0_rtx, code = LE; | |
5626 | break; | |
5627 | case LE: | |
5628 | if (op1 == constm1_rtx) | |
5629 | op1 = const0_rtx, code = LT; | |
5630 | break; | |
5631 | case GE: | |
5632 | if (op1 == const1_rtx) | |
5633 | op1 = const0_rtx, code = GT; | |
5634 | break; | |
5635 | case GT: | |
5636 | if (op1 == constm1_rtx) | |
5637 | op1 = const0_rtx, code = GE; | |
5638 | break; | |
5639 | case GEU: | |
5640 | if (op1 == const1_rtx) | |
5641 | op1 = const0_rtx, code = NE; | |
5642 | break; | |
5643 | case LTU: | |
5644 | if (op1 == const1_rtx) | |
5645 | op1 = const0_rtx, code = EQ; | |
5646 | break; | |
e9a25f70 JL |
5647 | default: |
5648 | break; | |
44037a66 TG |
5649 | } |
5650 | ||
884815aa JB |
5651 | /* If we are comparing a double-word integer with zero or -1, we can |
5652 | convert the comparison into one involving a single word. */ | |
b4206259 RS |
5653 | scalar_int_mode int_mode; |
5654 | if (is_int_mode (mode, &int_mode) | |
5655 | && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2 | |
3c0cb5de | 5656 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
6912b84b | 5657 | { |
fab27f52 | 5658 | rtx tem; |
884815aa JB |
5659 | if ((code == EQ || code == NE) |
5660 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
6912b84b | 5661 | { |
1ed20a40 | 5662 | rtx op00, op01; |
8433f113 | 5663 | |
a41a56b6 RE |
5664 | /* Do a logical OR or AND of the two words and compare the |
5665 | result. */ | |
b4206259 RS |
5666 | op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0); |
5667 | op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD); | |
1ed20a40 PB |
5668 | tem = expand_binop (word_mode, |
5669 | op1 == const0_rtx ? ior_optab : and_optab, | |
5670 | op00, op01, NULL_RTX, unsignedp, | |
5671 | OPTAB_DIRECT); | |
884815aa | 5672 | |
1ed20a40 PB |
5673 | if (tem != 0) |
5674 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
92355a9c | 5675 | unsignedp, normalizep); |
6912b84b | 5676 | } |
884815aa | 5677 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
8433f113 RH |
5678 | { |
5679 | rtx op0h; | |
5680 | ||
5681 | /* If testing the sign bit, can just test on high word. */ | |
b4206259 | 5682 | op0h = simplify_gen_subreg (word_mode, op0, int_mode, |
a41a56b6 | 5683 | subreg_highpart_offset (word_mode, |
b4206259 | 5684 | int_mode)); |
1ed20a40 PB |
5685 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5686 | unsignedp, normalizep); | |
5687 | } | |
5688 | else | |
5689 | tem = NULL_RTX; | |
5690 | ||
5691 | if (tem) | |
5692 | { | |
92355a9c | 5693 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
1ed20a40 | 5694 | return tem; |
92355a9c PB |
5695 | if (!target) |
5696 | target = gen_reg_rtx (target_mode); | |
1ed20a40 PB |
5697 | |
5698 | convert_move (target, tem, | |
2d0c270f BS |
5699 | !val_signbit_known_set_p (word_mode, |
5700 | (normalizep ? normalizep | |
5701 | : STORE_FLAG_VALUE))); | |
1ed20a40 | 5702 | return target; |
8433f113 | 5703 | } |
6912b84b RK |
5704 | } |
5705 | ||
44037a66 TG |
5706 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5707 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5708 | if (op1 == const0_rtx && (code == LT || code == GE) | |
b4206259 | 5709 | && is_int_mode (mode, &int_mode) |
44037a66 | 5710 | && (normalizep || STORE_FLAG_VALUE == 1 |
b4206259 | 5711 | || val_signbit_p (int_mode, STORE_FLAG_VALUE))) |
44037a66 | 5712 | { |
c7ad039d | 5713 | scalar_int_mode int_target_mode; |
8deb7047 | 5714 | subtarget = target; |
44037a66 | 5715 | |
495499da | 5716 | if (!target) |
c7ad039d RS |
5717 | int_target_mode = int_mode; |
5718 | else | |
44037a66 | 5719 | { |
c7ad039d RS |
5720 | /* If the result is to be wider than OP0, it is best to convert it |
5721 | first. If it is to be narrower, it is *incorrect* to convert it | |
5722 | first. */ | |
5723 | int_target_mode = as_a <scalar_int_mode> (target_mode); | |
5724 | if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode)) | |
5725 | { | |
5726 | op0 = convert_modes (int_target_mode, int_mode, op0, 0); | |
5727 | int_mode = int_target_mode; | |
5728 | } | |
44037a66 TG |
5729 | } |
5730 | ||
c7ad039d | 5731 | if (int_target_mode != int_mode) |
44037a66 TG |
5732 | subtarget = 0; |
5733 | ||
5734 | if (code == GE) | |
c7ad039d | 5735 | op0 = expand_unop (int_mode, one_cmpl_optab, op0, |
1d6eaf3d RK |
5736 | ((STORE_FLAG_VALUE == 1 || normalizep) |
5737 | ? 0 : subtarget), 0); | |
44037a66 | 5738 | |
1d6eaf3d | 5739 | if (STORE_FLAG_VALUE == 1 || normalizep) |
44037a66 TG |
5740 | /* If we are supposed to produce a 0/1 value, we want to do |
5741 | a logical shift from the sign bit to the low-order bit; for | |
5742 | a -1/0 value, we do an arithmetic shift. */ | |
c7ad039d RS |
5743 | op0 = expand_shift (RSHIFT_EXPR, int_mode, op0, |
5744 | GET_MODE_BITSIZE (int_mode) - 1, | |
44037a66 TG |
5745 | subtarget, normalizep != -1); |
5746 | ||
c7ad039d RS |
5747 | if (int_mode != int_target_mode) |
5748 | op0 = convert_modes (int_target_mode, int_mode, op0, 0); | |
44037a66 TG |
5749 | |
5750 | return op0; | |
5751 | } | |
5752 | ||
f90b7a5a | 5753 | mclass = GET_MODE_CLASS (mode); |
c94843d2 | 5754 | FOR_EACH_MODE_FROM (compare_mode, mode) |
a41a56b6 | 5755 | { |
ef4bddc2 | 5756 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
947131ba | 5757 | icode = optab_handler (cstore_optab, optab_mode); |
f90b7a5a | 5758 | if (icode != CODE_FOR_nothing) |
a41a56b6 | 5759 | { |
a41a56b6 | 5760 | do_pending_stack_adjust (); |
fab27f52 MM |
5761 | rtx tem = emit_cstore (target, icode, code, mode, compare_mode, |
5762 | unsignedp, op0, op1, normalizep, target_mode); | |
45475a3f PB |
5763 | if (tem) |
5764 | return tem; | |
44037a66 | 5765 | |
45475a3f | 5766 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
44037a66 | 5767 | { |
ef12ae45 | 5768 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
92355a9c | 5769 | unsignedp, op1, op0, normalizep, target_mode); |
45475a3f PB |
5770 | if (tem) |
5771 | return tem; | |
44037a66 | 5772 | } |
f90b7a5a | 5773 | break; |
44037a66 TG |
5774 | } |
5775 | } | |
5776 | ||
ef12ae45 PB |
5777 | return 0; |
5778 | } | |
5779 | ||
8afacf2c RS |
5780 | /* Subroutine of emit_store_flag that handles cases in which the operands |
5781 | are scalar integers. SUBTARGET is the target to use for temporary | |
5782 | operations and TRUEVAL is the value to store when the condition is | |
5783 | true. All other arguments are as for emit_store_flag. */ | |
ef12ae45 PB |
5784 | |
5785 | rtx | |
8afacf2c | 5786 | emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0, |
095a2d76 | 5787 | rtx op1, scalar_int_mode mode, int unsignedp, |
8afacf2c | 5788 | int normalizep, rtx trueval) |
ef12ae45 | 5789 | { |
ef4bddc2 | 5790 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
8afacf2c | 5791 | rtx_insn *last = get_last_insn (); |
495499da PB |
5792 | |
5793 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
44037a66 TG |
5794 | (or subtract) the two operands and use a recursive call to try the |
5795 | comparison with zero. Don't do any of these cases if branches are | |
5796 | very cheap. */ | |
5797 | ||
495499da | 5798 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
44037a66 | 5799 | { |
41defab3 RS |
5800 | rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, |
5801 | OPTAB_WIDEN); | |
44037a66 TG |
5802 | |
5803 | if (tem == 0) | |
5804 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5805 | OPTAB_WIDEN); | |
5806 | if (tem != 0) | |
a22fb74c AK |
5807 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5808 | mode, unsignedp, normalizep); | |
495499da PB |
5809 | if (tem != 0) |
5810 | return tem; | |
5811 | ||
5812 | delete_insns_since (last); | |
5813 | } | |
5814 | ||
5815 | /* For integer comparisons, try the reverse comparison. However, for | |
5816 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5817 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
8afacf2c | 5818 | rtx_code rcode = reverse_condition (code); |
495499da | 5819 | if (can_compare_p (rcode, mode, ccp_store_flag) |
947131ba | 5820 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
495499da PB |
5821 | && code == NE |
5822 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5823 | && op1 == const0_rtx)) | |
5824 | { | |
533d4b99 PB |
5825 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5826 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5827 | ||
495499da | 5828 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
533d4b99 | 5829 | if (want_add |
e548c9df | 5830 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
533d4b99 | 5831 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5832 | { |
41defab3 RS |
5833 | rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
5834 | STORE_FLAG_VALUE, target_mode); | |
495499da | 5835 | if (tem != 0) |
8afacf2c | 5836 | tem = expand_binop (target_mode, add_optab, tem, |
2f1cd2eb RS |
5837 | gen_int_mode (normalizep, target_mode), |
5838 | target, 0, OPTAB_WIDEN); | |
41defab3 RS |
5839 | if (tem != 0) |
5840 | return tem; | |
495499da | 5841 | } |
533d4b99 | 5842 | else if (!want_add |
e548c9df | 5843 | && rtx_cost (trueval, mode, XOR, 1, |
533d4b99 | 5844 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5845 | { |
41defab3 RS |
5846 | rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
5847 | normalizep, target_mode); | |
495499da | 5848 | if (tem != 0) |
8afacf2c | 5849 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, |
495499da | 5850 | INTVAL (trueval) >= 0, OPTAB_WIDEN); |
41defab3 RS |
5851 | if (tem != 0) |
5852 | return tem; | |
495499da PB |
5853 | } |
5854 | ||
495499da | 5855 | delete_insns_since (last); |
44037a66 TG |
5856 | } |
5857 | ||
c410d49e | 5858 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
44037a66 TG |
5859 | the constant zero. Reject all other comparisons at this point. Only |
5860 | do LE and GT if branches are expensive since they are expensive on | |
5861 | 2-operand machines. */ | |
5862 | ||
495499da | 5863 | if (op1 != const0_rtx |
44037a66 | 5864 | || (code != EQ && code != NE |
3a4fd356 JH |
5865 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5866 | false) <= 1 || (code != LE && code != GT)))) | |
44037a66 TG |
5867 | return 0; |
5868 | ||
44037a66 TG |
5869 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5870 | do the necessary operation below. */ | |
5871 | ||
41defab3 | 5872 | rtx tem = 0; |
44037a66 TG |
5873 | |
5874 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5875 | the sign bit set. */ | |
5876 | ||
5877 | if (code == LE) | |
5878 | { | |
5879 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5880 | if (rtx_equal_p (subtarget, op0)) | |
5881 | subtarget = 0; | |
5882 | ||
5883 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5884 | OPTAB_WIDEN); | |
5885 | if (tem) | |
5886 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5887 | OPTAB_WIDEN); | |
5888 | } | |
5889 | ||
5890 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5891 | number of bits in the mode of OP0, minus one. */ | |
5892 | ||
5893 | if (code == GT) | |
5894 | { | |
5895 | if (rtx_equal_p (subtarget, op0)) | |
5896 | subtarget = 0; | |
5897 | ||
ea000c3f EB |
5898 | tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0, |
5899 | GET_MODE_BITSIZE (mode) - 1, | |
5900 | subtarget, 0); | |
5901 | if (tem) | |
5902 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5903 | OPTAB_WIDEN); | |
44037a66 | 5904 | } |
c410d49e | 5905 | |
44037a66 TG |
5906 | if (code == EQ || code == NE) |
5907 | { | |
5908 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
cc2902df | 5909 | that converts the operand into a positive number if it is nonzero |
44037a66 TG |
5910 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5911 | for NE we negate. This puts the result in the sign bit. Then we | |
c410d49e | 5912 | normalize with a shift, if needed. |
44037a66 TG |
5913 | |
5914 | Two operations that can do the above actions are ABS and FFS, so try | |
5915 | them. If that doesn't work, and MODE is smaller than a full word, | |
36d747f6 | 5916 | we can use zero-extension to the wider mode (an unsigned conversion) |
44037a66 TG |
5917 | as the operation. */ |
5918 | ||
c410d49e EC |
5919 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5920 | that is compensated by the subsequent overflow when subtracting | |
30f7a378 | 5921 | one / negating. */ |
91ce572a | 5922 | |
947131ba | 5923 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
44037a66 | 5924 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
947131ba | 5925 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
44037a66 TG |
5926 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5927 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5928 | { | |
c2ec26b8 | 5929 | tem = convert_modes (word_mode, mode, op0, 1); |
81722fa9 | 5930 | mode = word_mode; |
44037a66 TG |
5931 | } |
5932 | ||
5933 | if (tem != 0) | |
5934 | { | |
5935 | if (code == EQ) | |
5936 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5937 | 0, OPTAB_WIDEN); | |
5938 | else | |
5939 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5940 | } | |
5941 | ||
5942 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5943 | of the value with itself. For EQ, we take the one's complement of | |
5944 | that "or", which is an extra insn, so we only handle EQ if branches | |
5945 | are expensive. */ | |
5946 | ||
3a4fd356 JH |
5947 | if (tem == 0 |
5948 | && (code == NE | |
5949 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
8afacf2c | 5950 | false) > 1)) |
44037a66 | 5951 | { |
36d747f6 RS |
5952 | if (rtx_equal_p (subtarget, op0)) |
5953 | subtarget = 0; | |
5954 | ||
44037a66 TG |
5955 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5956 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5957 | OPTAB_WIDEN); | |
5958 | ||
5959 | if (tem && code == EQ) | |
5960 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5961 | } | |
5962 | } | |
5963 | ||
5964 | if (tem && normalizep) | |
ea000c3f EB |
5965 | tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem, |
5966 | GET_MODE_BITSIZE (mode) - 1, | |
5967 | subtarget, normalizep == 1); | |
44037a66 | 5968 | |
91e66235 | 5969 | if (tem) |
44037a66 | 5970 | { |
495499da | 5971 | if (!target) |
8afacf2c | 5972 | ; |
495499da | 5973 | else if (GET_MODE (tem) != target_mode) |
91e66235 MM |
5974 | { |
5975 | convert_move (target, tem, 0); | |
5976 | tem = target; | |
5977 | } | |
5978 | else if (!subtarget) | |
5979 | { | |
5980 | emit_move_insn (target, tem); | |
5981 | tem = target; | |
5982 | } | |
44037a66 | 5983 | } |
91e66235 | 5984 | else |
44037a66 TG |
5985 | delete_insns_since (last); |
5986 | ||
5987 | return tem; | |
5988 | } | |
04a8ee2f | 5989 | |
8afacf2c RS |
5990 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 |
5991 | and storing in TARGET. Normally return TARGET. | |
5992 | Return 0 if that cannot be done. | |
5993 | ||
5994 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5995 | it is VOIDmode, they cannot both be CONST_INT. | |
5996 | ||
5997 | UNSIGNEDP is for the case where we have to widen the operands | |
5998 | to perform the operation. It says to use zero-extension. | |
5999 | ||
6000 | NORMALIZEP is 1 if we should convert the result to be either zero | |
6001 | or one. Normalize is -1 if we should convert the result to be | |
6002 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
6003 | "raw" out of the scc insn. */ | |
6004 | ||
6005 | rtx | |
6006 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
6007 | machine_mode mode, int unsignedp, int normalizep) | |
6008 | { | |
6009 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; | |
6010 | enum rtx_code rcode; | |
6011 | rtx subtarget; | |
6012 | rtx tem, trueval; | |
6013 | rtx_insn *last; | |
6014 | ||
6015 | /* If we compare constants, we shouldn't use a store-flag operation, | |
6016 | but a constant load. We can get there via the vanilla route that | |
6017 | usually generates a compare-branch sequence, but will in this case | |
6018 | fold the comparison to a constant, and thus elide the branch. */ | |
6019 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) | |
6020 | return NULL_RTX; | |
6021 | ||
6022 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, | |
6023 | target_mode); | |
6024 | if (tem) | |
6025 | return tem; | |
6026 | ||
6027 | /* If we reached here, we can't do this with a scc insn, however there | |
6028 | are some comparisons that can be done in other ways. Don't do any | |
6029 | of these cases if branches are very cheap. */ | |
6030 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
6031 | return 0; | |
6032 | ||
6033 | /* See what we need to return. We can only return a 1, -1, or the | |
6034 | sign bit. */ | |
6035 | ||
6036 | if (normalizep == 0) | |
6037 | { | |
6038 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
6039 | normalizep = STORE_FLAG_VALUE; | |
6040 | ||
6041 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) | |
6042 | ; | |
6043 | else | |
6044 | return 0; | |
6045 | } | |
6046 | ||
6047 | last = get_last_insn (); | |
6048 | ||
6049 | /* If optimizing, use different pseudo registers for each insn, instead | |
6050 | of reusing the same pseudo. This leads to better CSE, but slows | |
6051 | down the compiler, since there are more pseudos. */ | |
6052 | subtarget = (!optimize | |
6053 | && (target_mode == mode)) ? target : NULL_RTX; | |
6054 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); | |
6055 | ||
6056 | /* For floating-point comparisons, try the reverse comparison or try | |
6057 | changing the "orderedness" of the comparison. */ | |
6058 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
6059 | { | |
6060 | enum rtx_code first_code; | |
6061 | bool and_them; | |
6062 | ||
6063 | rcode = reverse_condition_maybe_unordered (code); | |
6064 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
6065 | && (code == ORDERED || code == UNORDERED | |
6066 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
6067 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
6068 | { | |
6069 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) | |
6070 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
6071 | ||
6072 | /* For the reverse comparison, use either an addition or a XOR. */ | |
6073 | if (want_add | |
6074 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, | |
6075 | optimize_insn_for_speed_p ()) == 0) | |
6076 | { | |
6077 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, | |
6078 | STORE_FLAG_VALUE, target_mode); | |
6079 | if (tem) | |
6080 | return expand_binop (target_mode, add_optab, tem, | |
6081 | gen_int_mode (normalizep, target_mode), | |
6082 | target, 0, OPTAB_WIDEN); | |
6083 | } | |
6084 | else if (!want_add | |
6085 | && rtx_cost (trueval, mode, XOR, 1, | |
6086 | optimize_insn_for_speed_p ()) == 0) | |
6087 | { | |
6088 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, | |
6089 | normalizep, target_mode); | |
6090 | if (tem) | |
6091 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
6092 | target, INTVAL (trueval) >= 0, | |
6093 | OPTAB_WIDEN); | |
6094 | } | |
6095 | } | |
6096 | ||
6097 | delete_insns_since (last); | |
6098 | ||
6099 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
6100 | if (code == ORDERED || code == UNORDERED) | |
6101 | return 0; | |
6102 | ||
6103 | and_them = split_comparison (code, mode, &first_code, &code); | |
6104 | ||
6105 | /* If there are no NaNs, the first comparison should always fall through. | |
6106 | Effectively change the comparison to the other one. */ | |
6107 | if (!HONOR_NANS (mode)) | |
6108 | { | |
6109 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
6110 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, | |
6111 | target_mode); | |
6112 | } | |
6113 | ||
6114 | if (!HAVE_conditional_move) | |
6115 | return 0; | |
6116 | ||
f4eafec9 UB |
6117 | /* Do not turn a trapping comparison into a non-trapping one. */ |
6118 | if ((code != EQ && code != NE && code != UNEQ && code != LTGT) | |
6119 | && flag_trapping_math) | |
6120 | return 0; | |
6121 | ||
8afacf2c RS |
6122 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a |
6123 | conditional move. */ | |
6124 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, | |
6125 | normalizep, target_mode); | |
6126 | if (tem == 0) | |
6127 | return 0; | |
6128 | ||
6129 | if (and_them) | |
6130 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
6131 | tem, const0_rtx, GET_MODE (tem), 0); | |
6132 | else | |
6133 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
6134 | trueval, tem, GET_MODE (tem), 0); | |
6135 | ||
6136 | if (tem == 0) | |
6137 | delete_insns_since (last); | |
6138 | return tem; | |
6139 | } | |
6140 | ||
6141 | /* The remaining tricks only apply to integer comparisons. */ | |
6142 | ||
b4206259 RS |
6143 | scalar_int_mode int_mode; |
6144 | if (is_int_mode (mode, &int_mode)) | |
6145 | return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode, | |
8afacf2c RS |
6146 | unsignedp, normalizep, trueval); |
6147 | ||
6148 | return 0; | |
6149 | } | |
6150 | ||
04a8ee2f TG |
6151 | /* Like emit_store_flag, but always succeeds. */ |
6152 | ||
6153 | rtx | |
502b8322 | 6154 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
ef4bddc2 | 6155 | machine_mode mode, int unsignedp, int normalizep) |
04a8ee2f | 6156 | { |
f3f6fb16 DM |
6157 | rtx tem; |
6158 | rtx_code_label *label; | |
495499da | 6159 | rtx trueval, falseval; |
04a8ee2f TG |
6160 | |
6161 | /* First see if emit_store_flag can do the job. */ | |
6162 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
6163 | if (tem != 0) | |
6164 | return tem; | |
6165 | ||
f8f77d4d SD |
6166 | /* If one operand is constant, make it the second one. Only do this |
6167 | if the other operand is not constant as well. */ | |
6168 | if (swap_commutative_operands_p (op0, op1)) | |
6169 | { | |
6170 | std::swap (op0, op1); | |
6171 | code = swap_condition (code); | |
6172 | } | |
6173 | ||
6174 | if (mode == VOIDmode) | |
6175 | mode = GET_MODE (op0); | |
6176 | ||
495499da PB |
6177 | if (!target) |
6178 | target = gen_reg_rtx (word_mode); | |
04a8ee2f | 6179 | |
495499da PB |
6180 | /* If this failed, we have to do this with set/compare/jump/set code. |
6181 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
6182 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
b8698a0f | 6183 | if (code == NE |
495499da PB |
6184 | && GET_MODE_CLASS (mode) == MODE_INT |
6185 | && REG_P (target) | |
6186 | && op0 == target | |
6187 | && op1 == const0_rtx) | |
6188 | { | |
6189 | label = gen_label_rtx (); | |
1476d1bd | 6190 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, |
357067f2 JH |
6191 | NULL_RTX, NULL, label, |
6192 | profile_probability::uninitialized ()); | |
495499da PB |
6193 | emit_move_insn (target, trueval); |
6194 | emit_label (label); | |
6195 | return target; | |
6196 | } | |
04a8ee2f | 6197 | |
f8cfc6aa | 6198 | if (!REG_P (target) |
04a8ee2f TG |
6199 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
6200 | target = gen_reg_rtx (GET_MODE (target)); | |
6201 | ||
495499da PB |
6202 | /* Jump in the right direction if the target cannot implement CODE |
6203 | but can jump on its reverse condition. */ | |
6204 | falseval = const0_rtx; | |
6205 | if (! can_compare_p (code, mode, ccp_jump) | |
6206 | && (! FLOAT_MODE_P (mode) | |
6207 | || code == ORDERED || code == UNORDERED | |
6208 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
6209 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
6210 | { | |
6211 | enum rtx_code rcode; | |
6212 | if (FLOAT_MODE_P (mode)) | |
6213 | rcode = reverse_condition_maybe_unordered (code); | |
6214 | else | |
6215 | rcode = reverse_condition (code); | |
6216 | ||
6217 | /* Canonicalize to UNORDERED for the libcall. */ | |
6218 | if (can_compare_p (rcode, mode, ccp_jump) | |
6219 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
6220 | { | |
6221 | falseval = trueval; | |
6222 | trueval = const0_rtx; | |
6223 | code = rcode; | |
6224 | } | |
6225 | } | |
6226 | ||
6227 | emit_move_insn (target, trueval); | |
04a8ee2f | 6228 | label = gen_label_rtx (); |
1476d1bd | 6229 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, |
357067f2 | 6230 | label, profile_probability::uninitialized ()); |
04a8ee2f | 6231 | |
495499da | 6232 | emit_move_insn (target, falseval); |
44037a66 TG |
6233 | emit_label (label); |
6234 | ||
6235 | return target; | |
6236 | } | |
ec18e48e VL |
6237 | |
6238 | /* Helper function for canonicalize_cmp_for_target. Swap between inclusive | |
6239 | and exclusive ranges in order to create an equivalent comparison. See | |
6240 | canonicalize_cmp_for_target for the possible cases. */ | |
6241 | ||
6242 | static enum rtx_code | |
6243 | equivalent_cmp_code (enum rtx_code code) | |
6244 | { | |
6245 | switch (code) | |
6246 | { | |
6247 | case GT: | |
6248 | return GE; | |
6249 | case GE: | |
6250 | return GT; | |
6251 | case LT: | |
6252 | return LE; | |
6253 | case LE: | |
6254 | return LT; | |
6255 | case GTU: | |
6256 | return GEU; | |
6257 | case GEU: | |
6258 | return GTU; | |
6259 | case LTU: | |
6260 | return LEU; | |
6261 | case LEU: | |
6262 | return LTU; | |
6263 | ||
6264 | default: | |
6265 | return code; | |
6266 | } | |
6267 | } | |
6268 | ||
6269 | /* Choose the more appropiate immediate in scalar integer comparisons. The | |
6270 | purpose of this is to end up with an immediate which can be loaded into a | |
6271 | register in fewer moves, if possible. | |
6272 | ||
6273 | For each integer comparison there exists an equivalent choice: | |
6274 | i) a > b or a >= b + 1 | |
6275 | ii) a <= b or a < b + 1 | |
6276 | iii) a >= b or a > b - 1 | |
6277 | iv) a < b or a <= b - 1 | |
6278 | ||
6279 | MODE is the mode of the first operand. | |
6280 | CODE points to the comparison code. | |
6281 | IMM points to the rtx containing the immediate. *IMM must satisfy | |
6282 | CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P | |
6283 | on exit. */ | |
6284 | ||
6285 | void | |
6286 | canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm) | |
6287 | { | |
6288 | if (!SCALAR_INT_MODE_P (mode)) | |
6289 | return; | |
6290 | ||
6291 | int to_add = 0; | |
6292 | enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED; | |
6293 | ||
6294 | /* Extract the immediate value from the rtx. */ | |
6295 | wide_int imm_val = rtx_mode_t (*imm, mode); | |
6296 | ||
6297 | if (*code == GT || *code == GTU || *code == LE || *code == LEU) | |
6298 | to_add = 1; | |
6299 | else if (*code == GE || *code == GEU || *code == LT || *code == LTU) | |
6300 | to_add = -1; | |
6301 | else | |
6302 | return; | |
6303 | ||
6304 | /* Check for overflow/underflow in the case of signed values and | |
6305 | wrapping around in the case of unsigned values. If any occur | |
6306 | cancel the optimization. */ | |
6307 | wi::overflow_type overflow = wi::OVF_NONE; | |
c729951e VL |
6308 | wide_int imm_modif; |
6309 | ||
6310 | if (to_add == 1) | |
6311 | imm_modif = wi::add (imm_val, 1, sgn, &overflow); | |
6312 | else | |
6313 | imm_modif = wi::sub (imm_val, 1, sgn, &overflow); | |
6314 | ||
ec18e48e VL |
6315 | if (overflow) |
6316 | return; | |
6317 | ||
84ea73e1 SB |
6318 | /* The following creates a pseudo; if we cannot do that, bail out. */ |
6319 | if (!can_create_pseudo_p ()) | |
6320 | return; | |
6321 | ||
ec18e48e VL |
6322 | rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
6323 | rtx new_imm = immed_wide_int_const (imm_modif, mode); | |
6324 | ||
6325 | rtx_insn *old_rtx = gen_move_insn (reg, *imm); | |
6326 | rtx_insn *new_rtx = gen_move_insn (reg, new_imm); | |
6327 | ||
6328 | /* Update the immediate and the code. */ | |
6329 | if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true)) | |
6330 | { | |
6331 | *code = equivalent_cmp_code (*code); | |
6332 | *imm = new_imm; | |
6333 | } | |
6334 | } | |
6335 | ||
6336 | ||
f5963e61 JL |
6337 | \f |
6338 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
feb04780 RS |
6339 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
6340 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
f5963e61 JL |
6341 | |
6342 | static void | |
ef4bddc2 | 6343 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
f3f6fb16 | 6344 | rtx_code_label *label) |
f5963e61 | 6345 | { |
feb04780 | 6346 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
1476d1bd | 6347 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, |
357067f2 | 6348 | NULL, label, profile_probability::uninitialized ()); |
f5963e61 | 6349 | } |