]>
Commit | Line | Data |
---|---|---|
44037a66 TG |
1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
23a5b65a | 3 | Copyright (C) 1987-2014 Free Software Foundation, Inc. |
44037a66 | 4 | |
1322177d | 5 | This file is part of GCC. |
44037a66 | 6 | |
1322177d LB |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
1322177d | 10 | version. |
44037a66 | 11 | |
1322177d LB |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
44037a66 TG |
16 | |
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
44037a66 TG |
20 | |
21 | ||
22 | #include "config.h" | |
670ee920 | 23 | #include "system.h" |
4977bab6 ZW |
24 | #include "coretypes.h" |
25 | #include "tm.h" | |
718f9c0f | 26 | #include "diagnostic-core.h" |
44037a66 TG |
27 | #include "rtl.h" |
28 | #include "tree.h" | |
d8a2d370 | 29 | #include "stor-layout.h" |
6baf1cc8 | 30 | #include "tm_p.h" |
44037a66 | 31 | #include "flags.h" |
44037a66 TG |
32 | #include "insn-config.h" |
33 | #include "expr.h" | |
e78d8e51 | 34 | #include "optabs.h" |
44037a66 | 35 | #include "recog.h" |
b0c48229 | 36 | #include "langhooks.h" |
60393bbc AM |
37 | #include "predict.h" |
38 | #include "basic-block.h" | |
6fb5fa3c | 39 | #include "df.h" |
0890b981 | 40 | #include "target.h" |
462f85ce RS |
41 | #include "expmed.h" |
42 | ||
43 | struct target_expmed default_target_expmed; | |
44 | #if SWITCHABLE_TARGET | |
45 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
46 | #endif | |
44037a66 | 47 | |
502b8322 AJ |
48 | static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
49 | unsigned HOST_WIDE_INT, | |
1169e45d AH |
50 | unsigned HOST_WIDE_INT, |
51 | unsigned HOST_WIDE_INT, | |
1169e45d | 52 | rtx); |
ebb99f96 BE |
53 | static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT, |
54 | unsigned HOST_WIDE_INT, | |
55 | rtx); | |
502b8322 | 56 | static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
1169e45d AH |
57 | unsigned HOST_WIDE_INT, |
58 | unsigned HOST_WIDE_INT, | |
59 | unsigned HOST_WIDE_INT, | |
60 | rtx); | |
ef4bddc2 | 61 | static rtx extract_fixed_bit_field (machine_mode, rtx, |
502b8322 | 62 | unsigned HOST_WIDE_INT, |
c6285bd7 | 63 | unsigned HOST_WIDE_INT, rtx, int); |
ef4bddc2 | 64 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, |
6f4e9cf8 BE |
65 | unsigned HOST_WIDE_INT, |
66 | unsigned HOST_WIDE_INT, rtx, int); | |
ef4bddc2 | 67 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
502b8322 AJ |
68 | static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
69 | unsigned HOST_WIDE_INT, int); | |
ef4bddc2 RS |
70 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
71 | static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT); | |
72 | static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT); | |
44037a66 | 73 | |
807e902e KZ |
74 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
75 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. | |
76 | The mask is truncated if necessary to the width of mode MODE. The | |
77 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ | |
78 | ||
79 | static inline rtx | |
ef4bddc2 | 80 | mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement) |
807e902e KZ |
81 | { |
82 | return immed_wide_int_const | |
83 | (wi::shifted_mask (bitpos, bitsize, complement, | |
84 | GET_MODE_PRECISION (mode)), mode); | |
85 | } | |
86 | ||
58b42e19 | 87 | /* Test whether a value is zero of a power of two. */ |
be63b77d JJ |
88 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
89 | (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0) | |
58b42e19 | 90 | |
84ddb681 | 91 | struct init_expmed_rtl |
44037a66 | 92 | { |
c83cf304 JJ |
93 | rtx reg; |
94 | rtx plus; | |
95 | rtx neg; | |
96 | rtx mult; | |
97 | rtx sdiv; | |
98 | rtx udiv; | |
99 | rtx sdiv_32; | |
100 | rtx smod_32; | |
101 | rtx wide_mult; | |
102 | rtx wide_lshr; | |
103 | rtx wide_trunc; | |
104 | rtx shift; | |
105 | rtx shift_mult; | |
106 | rtx shift_add; | |
107 | rtx shift_sub0; | |
108 | rtx shift_sub1; | |
109 | rtx zext; | |
110 | rtx trunc; | |
79b4a8dc | 111 | |
965703ed RS |
112 | rtx pow2[MAX_BITS_PER_WORD]; |
113 | rtx cint[MAX_BITS_PER_WORD]; | |
84ddb681 RH |
114 | }; |
115 | ||
91f8035e | 116 | static void |
ef4bddc2 RS |
117 | init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode, |
118 | machine_mode from_mode, bool speed) | |
91f8035e RH |
119 | { |
120 | int to_size, from_size; | |
121 | rtx which; | |
122 | ||
50b6ee8b DD |
123 | to_size = GET_MODE_PRECISION (to_mode); |
124 | from_size = GET_MODE_PRECISION (from_mode); | |
125 | ||
126 | /* Most partial integers have a precision less than the "full" | |
127 | integer it requires for storage. In case one doesn't, for | |
128 | comparison purposes here, reduce the bit size by one in that | |
129 | case. */ | |
130 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT | |
131 | && exact_log2 (to_size) != -1) | |
132 | to_size --; | |
133 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT | |
134 | && exact_log2 (from_size) != -1) | |
135 | from_size --; | |
91f8035e RH |
136 | |
137 | /* Assume cost of zero-extend and sign-extend is the same. */ | |
c83cf304 | 138 | which = (to_size < from_size ? all->trunc : all->zext); |
91f8035e | 139 | |
c83cf304 | 140 | PUT_MODE (all->reg, from_mode); |
91f8035e RH |
141 | set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed)); |
142 | } | |
143 | ||
84ddb681 RH |
144 | static void |
145 | init_expmed_one_mode (struct init_expmed_rtl *all, | |
ef4bddc2 | 146 | machine_mode mode, int speed) |
84ddb681 RH |
147 | { |
148 | int m, n, mode_bitsize; | |
ef4bddc2 | 149 | machine_mode mode_from; |
44037a66 | 150 | |
84ddb681 | 151 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
38a448ca | 152 | |
c83cf304 JJ |
153 | PUT_MODE (all->reg, mode); |
154 | PUT_MODE (all->plus, mode); | |
155 | PUT_MODE (all->neg, mode); | |
156 | PUT_MODE (all->mult, mode); | |
157 | PUT_MODE (all->sdiv, mode); | |
158 | PUT_MODE (all->udiv, mode); | |
159 | PUT_MODE (all->sdiv_32, mode); | |
160 | PUT_MODE (all->smod_32, mode); | |
161 | PUT_MODE (all->wide_trunc, mode); | |
162 | PUT_MODE (all->shift, mode); | |
163 | PUT_MODE (all->shift_mult, mode); | |
164 | PUT_MODE (all->shift_add, mode); | |
165 | PUT_MODE (all->shift_sub0, mode); | |
166 | PUT_MODE (all->shift_sub1, mode); | |
167 | PUT_MODE (all->zext, mode); | |
168 | PUT_MODE (all->trunc, mode); | |
169 | ||
170 | set_add_cost (speed, mode, set_src_cost (all->plus, speed)); | |
171 | set_neg_cost (speed, mode, set_src_cost (all->neg, speed)); | |
172 | set_mul_cost (speed, mode, set_src_cost (all->mult, speed)); | |
173 | set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed)); | |
174 | set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed)); | |
175 | ||
176 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed) | |
5322d07e | 177 | <= 2 * add_cost (speed, mode))); |
c83cf304 | 178 | set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed) |
5322d07e NF |
179 | <= 4 * add_cost (speed, mode))); |
180 | ||
181 | set_shift_cost (speed, mode, 0, 0); | |
182 | { | |
183 | int cost = add_cost (speed, mode); | |
184 | set_shiftadd_cost (speed, mode, 0, cost); | |
185 | set_shiftsub0_cost (speed, mode, 0, cost); | |
186 | set_shiftsub1_cost (speed, mode, 0, cost); | |
187 | } | |
84ddb681 RH |
188 | |
189 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); | |
190 | for (m = 1; m < n; m++) | |
191 | { | |
c83cf304 JJ |
192 | XEXP (all->shift, 1) = all->cint[m]; |
193 | XEXP (all->shift_mult, 1) = all->pow2[m]; | |
84ddb681 | 194 | |
c83cf304 JJ |
195 | set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed)); |
196 | set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed)); | |
197 | set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed)); | |
198 | set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed)); | |
84ddb681 RH |
199 | } |
200 | ||
201 | if (SCALAR_INT_MODE_P (mode)) | |
965703ed | 202 | { |
91f8035e | 203 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
ef4bddc2 | 204 | mode_from = (machine_mode)(mode_from + 1)) |
91f8035e RH |
205 | init_expmed_one_conv (all, mode, mode_from, speed); |
206 | } | |
207 | if (GET_MODE_CLASS (mode) == MODE_INT) | |
208 | { | |
ef4bddc2 | 209 | machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
84ddb681 RH |
210 | if (wider_mode != VOIDmode) |
211 | { | |
c83cf304 JJ |
212 | PUT_MODE (all->zext, wider_mode); |
213 | PUT_MODE (all->wide_mult, wider_mode); | |
214 | PUT_MODE (all->wide_lshr, wider_mode); | |
215 | XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); | |
84ddb681 | 216 | |
91f8035e | 217 | set_mul_widen_cost (speed, wider_mode, |
c83cf304 | 218 | set_src_cost (all->wide_mult, speed)); |
91f8035e | 219 | set_mul_highpart_cost (speed, mode, |
c83cf304 | 220 | set_src_cost (all->wide_trunc, speed)); |
84ddb681 | 221 | } |
965703ed | 222 | } |
84ddb681 RH |
223 | } |
224 | ||
225 | void | |
226 | init_expmed (void) | |
227 | { | |
228 | struct init_expmed_rtl all; | |
ef4bddc2 | 229 | machine_mode mode = QImode; |
84ddb681 RH |
230 | int m, speed; |
231 | ||
79b4a8dc | 232 | memset (&all, 0, sizeof all); |
84ddb681 RH |
233 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
234 | { | |
235 | all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); | |
236 | all.cint[m] = GEN_INT (m); | |
237 | } | |
79b4a8dc | 238 | |
1d27fed4 | 239 | /* Avoid using hard regs in ways which may be unsupported. */ |
c83cf304 JJ |
240 | all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
241 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); | |
242 | all.neg = gen_rtx_NEG (mode, all.reg); | |
243 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
244 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); | |
245 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); | |
246 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); | |
247 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); | |
248 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); | |
249 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); | |
250 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); | |
251 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); | |
252 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); | |
253 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
254 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); | |
255 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); | |
256 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); | |
257 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); | |
6dd8f4bb | 258 | |
f40751dd | 259 | for (speed = 0; speed < 2; speed++) |
71af73bb | 260 | { |
f40751dd | 261 | crtl->maybe_hot_insn_p = speed; |
5322d07e | 262 | set_zero_cost (speed, set_src_cost (const0_rtx, speed)); |
79b4a8dc | 263 | |
91f8035e | 264 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
ef4bddc2 | 265 | mode = (machine_mode)(mode + 1)) |
84ddb681 | 266 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 267 | |
91f8035e RH |
268 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
269 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; | |
ef4bddc2 | 270 | mode = (machine_mode)(mode + 1)) |
91f8035e RH |
271 | init_expmed_one_mode (&all, mode, speed); |
272 | ||
273 | if (MIN_MODE_VECTOR_INT != VOIDmode) | |
274 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; | |
ef4bddc2 | 275 | mode = (machine_mode)(mode + 1)) |
91f8035e | 276 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 277 | } |
84ddb681 | 278 | |
5322d07e NF |
279 | if (alg_hash_used_p ()) |
280 | { | |
281 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); | |
282 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); | |
283 | } | |
c371bb73 | 284 | else |
5322d07e | 285 | set_alg_hash_used_p (true); |
f40751dd | 286 | default_rtl_profile (); |
c83cf304 JJ |
287 | |
288 | ggc_free (all.trunc); | |
289 | ggc_free (all.shift_sub1); | |
290 | ggc_free (all.shift_sub0); | |
291 | ggc_free (all.shift_add); | |
292 | ggc_free (all.shift_mult); | |
293 | ggc_free (all.shift); | |
294 | ggc_free (all.wide_trunc); | |
295 | ggc_free (all.wide_lshr); | |
296 | ggc_free (all.wide_mult); | |
297 | ggc_free (all.zext); | |
298 | ggc_free (all.smod_32); | |
299 | ggc_free (all.sdiv_32); | |
300 | ggc_free (all.udiv); | |
301 | ggc_free (all.sdiv); | |
302 | ggc_free (all.mult); | |
303 | ggc_free (all.neg); | |
304 | ggc_free (all.plus); | |
305 | ggc_free (all.reg); | |
44037a66 TG |
306 | } |
307 | ||
308 | /* Return an rtx representing minus the value of X. | |
309 | MODE is the intended mode of the result, | |
310 | useful if X is a CONST_INT. */ | |
311 | ||
312 | rtx | |
ef4bddc2 | 313 | negate_rtx (machine_mode mode, rtx x) |
44037a66 | 314 | { |
a39a7484 RK |
315 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
316 | ||
fdb5537f | 317 | if (result == 0) |
a39a7484 RK |
318 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
319 | ||
320 | return result; | |
44037a66 | 321 | } |
da920570 | 322 | |
26f8b976 RS |
323 | /* Adjust bitfield memory MEM so that it points to the first unit of mode |
324 | MODE that contains a bitfield of size BITSIZE at bit position BITNUM. | |
325 | If MODE is BLKmode, return a reference to every byte in the bitfield. | |
326 | Set *NEW_BITNUM to the bit position of the field within the new memory. */ | |
327 | ||
328 | static rtx | |
ef4bddc2 | 329 | narrow_bit_field_mem (rtx mem, machine_mode mode, |
26f8b976 RS |
330 | unsigned HOST_WIDE_INT bitsize, |
331 | unsigned HOST_WIDE_INT bitnum, | |
332 | unsigned HOST_WIDE_INT *new_bitnum) | |
333 | { | |
334 | if (mode == BLKmode) | |
335 | { | |
336 | *new_bitnum = bitnum % BITS_PER_UNIT; | |
337 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; | |
338 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) | |
339 | / BITS_PER_UNIT); | |
340 | return adjust_bitfield_address_size (mem, mode, offset, size); | |
341 | } | |
342 | else | |
343 | { | |
344 | unsigned int unit = GET_MODE_BITSIZE (mode); | |
345 | *new_bitnum = bitnum % unit; | |
346 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; | |
347 | return adjust_bitfield_address (mem, mode, offset); | |
348 | } | |
349 | } | |
350 | ||
fcdd52b7 RS |
351 | /* The caller wants to perform insertion or extraction PATTERN on a |
352 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. | |
353 | BITREGION_START and BITREGION_END are as for store_bit_field | |
354 | and FIELDMODE is the natural mode of the field. | |
355 | ||
356 | Search for a mode that is compatible with the memory access | |
357 | restrictions and (where applicable) with a register insertion or | |
358 | extraction. Return the new memory on success, storing the adjusted | |
359 | bit position in *NEW_BITNUM. Return null otherwise. */ | |
360 | ||
361 | static rtx | |
362 | adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, | |
363 | rtx op0, HOST_WIDE_INT bitsize, | |
364 | HOST_WIDE_INT bitnum, | |
365 | unsigned HOST_WIDE_INT bitregion_start, | |
366 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 367 | machine_mode fieldmode, |
fcdd52b7 RS |
368 | unsigned HOST_WIDE_INT *new_bitnum) |
369 | { | |
370 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, | |
371 | bitregion_end, MEM_ALIGN (op0), | |
372 | MEM_VOLATILE_P (op0)); | |
ef4bddc2 | 373 | machine_mode best_mode; |
fcdd52b7 RS |
374 | if (iter.next_mode (&best_mode)) |
375 | { | |
376 | /* We can use a memory in BEST_MODE. See whether this is true for | |
377 | any wider modes. All other things being equal, we prefer to | |
378 | use the widest mode possible because it tends to expose more | |
379 | CSE opportunities. */ | |
380 | if (!iter.prefer_smaller_modes ()) | |
381 | { | |
382 | /* Limit the search to the mode required by the corresponding | |
383 | register insertion or extraction instruction, if any. */ | |
ef4bddc2 | 384 | machine_mode limit_mode = word_mode; |
fcdd52b7 RS |
385 | extraction_insn insn; |
386 | if (get_best_reg_extraction_insn (&insn, pattern, | |
387 | GET_MODE_BITSIZE (best_mode), | |
388 | fieldmode)) | |
389 | limit_mode = insn.field_mode; | |
390 | ||
ef4bddc2 | 391 | machine_mode wider_mode; |
fcdd52b7 RS |
392 | while (iter.next_mode (&wider_mode) |
393 | && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) | |
394 | best_mode = wider_mode; | |
395 | } | |
396 | return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, | |
397 | new_bitnum); | |
398 | } | |
399 | return NULL_RTX; | |
400 | } | |
401 | ||
bebf0797 RS |
402 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
403 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg | |
404 | offset is then BITNUM / BITS_PER_UNIT. */ | |
405 | ||
406 | static bool | |
407 | lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, | |
408 | unsigned HOST_WIDE_INT bitsize, | |
ef4bddc2 | 409 | machine_mode struct_mode) |
bebf0797 RS |
410 | { |
411 | if (BYTES_BIG_ENDIAN) | |
c1a4d0b5 | 412 | return (bitnum % BITS_PER_UNIT == 0 |
bebf0797 RS |
413 | && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) |
414 | || (bitnum + bitsize) % BITS_PER_WORD == 0)); | |
415 | else | |
416 | return bitnum % BITS_PER_WORD == 0; | |
417 | } | |
00efe3ea | 418 | |
548cfdc2 | 419 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
6f4e9cf8 BE |
420 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
421 | Return false if the access would touch memory outside the range | |
422 | BITREGION_START to BITREGION_END for conformance to the C++ memory | |
423 | model. */ | |
f5d4f18c SL |
424 | |
425 | static bool | |
426 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
427 | unsigned HOST_WIDE_INT bitnum, | |
ef4bddc2 | 428 | machine_mode fieldmode, |
6f4e9cf8 BE |
429 | unsigned HOST_WIDE_INT bitregion_start, |
430 | unsigned HOST_WIDE_INT bitregion_end) | |
f5d4f18c SL |
431 | { |
432 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); | |
433 | ||
434 | /* -fstrict-volatile-bitfields must be enabled and we must have a | |
435 | volatile MEM. */ | |
436 | if (!MEM_P (op0) | |
437 | || !MEM_VOLATILE_P (op0) | |
438 | || flag_strict_volatile_bitfields <= 0) | |
439 | return false; | |
440 | ||
441 | /* Non-integral modes likely only happen with packed structures. | |
442 | Punt. */ | |
443 | if (!SCALAR_INT_MODE_P (fieldmode)) | |
444 | return false; | |
445 | ||
446 | /* The bit size must not be larger than the field mode, and | |
447 | the field mode must not be larger than a word. */ | |
448 | if (bitsize > modesize || modesize > BITS_PER_WORD) | |
449 | return false; | |
450 | ||
451 | /* Check for cases of unaligned fields that must be split. */ | |
452 | if (bitnum % BITS_PER_UNIT + bitsize > modesize | |
453 | || (STRICT_ALIGNMENT | |
454 | && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize)) | |
455 | return false; | |
456 | ||
6f4e9cf8 BE |
457 | /* Check for cases where the C++ memory model applies. */ |
458 | if (bitregion_end != 0 | |
459 | && (bitnum - bitnum % modesize < bitregion_start | |
460 | || bitnum - bitnum % modesize + modesize > bitregion_end)) | |
461 | return false; | |
462 | ||
f5d4f18c SL |
463 | return true; |
464 | } | |
465 | ||
00efe3ea RS |
466 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
467 | bit number BITNUM can be treated as a simple value of mode MODE. */ | |
468 | ||
469 | static bool | |
470 | simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
ef4bddc2 | 471 | unsigned HOST_WIDE_INT bitnum, machine_mode mode) |
00efe3ea RS |
472 | { |
473 | return (MEM_P (op0) | |
474 | && bitnum % BITS_PER_UNIT == 0 | |
475 | && bitsize == GET_MODE_BITSIZE (mode) | |
476 | && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) | |
477 | || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 | |
478 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); | |
479 | } | |
6d7db3c5 | 480 | \f |
fcdd52b7 RS |
481 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
482 | BITSIZE and BITNUM are as for store_bit_field. */ | |
a20556e4 RS |
483 | |
484 | static bool | |
fcdd52b7 RS |
485 | store_bit_field_using_insv (const extraction_insn *insv, rtx op0, |
486 | unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 EB |
487 | unsigned HOST_WIDE_INT bitnum, |
488 | rtx value) | |
a20556e4 RS |
489 | { |
490 | struct expand_operand ops[4]; | |
491 | rtx value1; | |
492 | rtx xop0 = op0; | |
f3f6fb16 | 493 | rtx_insn *last = get_last_insn (); |
a20556e4 RS |
494 | bool copy_back = false; |
495 | ||
ef4bddc2 | 496 | machine_mode op_mode = insv->field_mode; |
a20556e4 RS |
497 | unsigned int unit = GET_MODE_BITSIZE (op_mode); |
498 | if (bitsize == 0 || bitsize > unit) | |
499 | return false; | |
500 | ||
501 | if (MEM_P (xop0)) | |
26f8b976 | 502 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
503 | xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, |
504 | &bitnum); | |
a20556e4 RS |
505 | else |
506 | { | |
507 | /* Convert from counting within OP0 to counting in OP_MODE. */ | |
508 | if (BYTES_BIG_ENDIAN) | |
509 | bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
510 | ||
511 | /* If xop0 is a register, we need it in OP_MODE | |
512 | to make it acceptable to the format of insv. */ | |
513 | if (GET_CODE (xop0) == SUBREG) | |
514 | /* We can't just change the mode, because this might clobber op0, | |
515 | and we will need the original value of op0 if insv fails. */ | |
516 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); | |
517 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
518 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); | |
519 | } | |
520 | ||
521 | /* If the destination is a paradoxical subreg such that we need a | |
522 | truncate to the inner mode, perform the insertion on a temporary and | |
523 | truncate the result to the original destination. Note that we can't | |
524 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
525 | X) 0)) is (reg:N X). */ | |
526 | if (GET_CODE (xop0) == SUBREG | |
527 | && REG_P (SUBREG_REG (xop0)) | |
528 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), | |
529 | op_mode)) | |
530 | { | |
531 | rtx tem = gen_reg_rtx (op_mode); | |
532 | emit_move_insn (tem, xop0); | |
533 | xop0 = tem; | |
534 | copy_back = true; | |
535 | } | |
536 | ||
537 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | |
538 | "backwards" from the size of the unit we are inserting into. | |
539 | Otherwise, we count bits from the most significant on a | |
540 | BYTES/BITS_BIG_ENDIAN machine. */ | |
541 | ||
542 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
543 | bitnum = unit - bitsize - bitnum; | |
544 | ||
545 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ | |
546 | value1 = value; | |
547 | if (GET_MODE (value) != op_mode) | |
548 | { | |
549 | if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) | |
550 | { | |
551 | /* Optimization: Don't bother really extending VALUE | |
552 | if it has all the bits we will actually use. However, | |
553 | if we must narrow it, be sure we do it correctly. */ | |
554 | ||
555 | if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) | |
556 | { | |
557 | rtx tmp; | |
558 | ||
559 | tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); | |
560 | if (! tmp) | |
561 | tmp = simplify_gen_subreg (op_mode, | |
562 | force_reg (GET_MODE (value), | |
563 | value1), | |
564 | GET_MODE (value), 0); | |
565 | value1 = tmp; | |
566 | } | |
567 | else | |
568 | value1 = gen_lowpart (op_mode, value1); | |
569 | } | |
570 | else if (CONST_INT_P (value)) | |
571 | value1 = gen_int_mode (INTVAL (value), op_mode); | |
572 | else | |
573 | /* Parse phase is supposed to make VALUE's data type | |
574 | match that of the component reference, which is a type | |
575 | at least as wide as the field; so VALUE should have | |
576 | a mode that corresponds to that type. */ | |
577 | gcc_assert (CONSTANT_P (value)); | |
578 | } | |
579 | ||
580 | create_fixed_operand (&ops[0], xop0); | |
581 | create_integer_operand (&ops[1], bitsize); | |
582 | create_integer_operand (&ops[2], bitnum); | |
583 | create_input_operand (&ops[3], value1, op_mode); | |
fcdd52b7 | 584 | if (maybe_expand_insn (insv->icode, 4, ops)) |
a20556e4 RS |
585 | { |
586 | if (copy_back) | |
587 | convert_move (op0, xop0, true); | |
588 | return true; | |
589 | } | |
590 | delete_insns_since (last); | |
591 | return false; | |
592 | } | |
593 | ||
6d7db3c5 RS |
594 | /* A subroutine of store_bit_field, with the same arguments. Return true |
595 | if the operation could be implemented. | |
44037a66 | 596 | |
6d7db3c5 RS |
597 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
598 | no other way of implementing the operation. If FALLBACK_P is false, | |
599 | return false instead. */ | |
600 | ||
601 | static bool | |
602 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1169e45d AH |
603 | unsigned HOST_WIDE_INT bitnum, |
604 | unsigned HOST_WIDE_INT bitregion_start, | |
605 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 606 | machine_mode fieldmode, |
6d7db3c5 | 607 | rtx value, bool fallback_p) |
44037a66 | 608 | { |
b3694847 | 609 | rtx op0 = str_rtx; |
28526e20 | 610 | rtx orig_value; |
da920570 | 611 | |
44037a66 TG |
612 | while (GET_CODE (op0) == SUBREG) |
613 | { | |
614 | /* The following line once was done only if WORDS_BIG_ENDIAN, | |
615 | but I think that is a mistake. WORDS_BIG_ENDIAN is | |
616 | meaningful at a much higher level; when structures are copied | |
617 | between memory and regs, the higher-numbered regs | |
618 | always get higher addresses. */ | |
495db1a1 AK |
619 | int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
620 | int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); | |
bebf0797 | 621 | int byte_offset = 0; |
495db1a1 AK |
622 | |
623 | /* Paradoxical subregs need special handling on big endian machines. */ | |
624 | if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) | |
625 | { | |
626 | int difference = inner_mode_size - outer_mode_size; | |
627 | ||
628 | if (WORDS_BIG_ENDIAN) | |
629 | byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; | |
630 | if (BYTES_BIG_ENDIAN) | |
631 | byte_offset += difference % UNITS_PER_WORD; | |
632 | } | |
633 | else | |
634 | byte_offset = SUBREG_BYTE (op0); | |
635 | ||
636 | bitnum += byte_offset * BITS_PER_UNIT; | |
44037a66 TG |
637 | op0 = SUBREG_REG (op0); |
638 | } | |
639 | ||
2c58f7dd RS |
640 | /* No action is needed if the target is a register and if the field |
641 | lies completely outside that register. This can occur if the source | |
642 | code contains an out-of-bounds access to a small array. */ | |
643 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
6d7db3c5 | 644 | return true; |
2c58f7dd | 645 | |
b42271d6 | 646 | /* Use vec_set patterns for inserting parts of vectors whenever |
997404de JH |
647 | available. */ |
648 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 649 | && !MEM_P (op0) |
947131ba | 650 | && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing |
997404de JH |
651 | && fieldmode == GET_MODE_INNER (GET_MODE (op0)) |
652 | && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) | |
653 | && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
654 | { | |
a5c7d693 | 655 | struct expand_operand ops[3]; |
ef4bddc2 RS |
656 | machine_mode outermode = GET_MODE (op0); |
657 | machine_mode innermode = GET_MODE_INNER (outermode); | |
a5c7d693 | 658 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |
997404de | 659 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de | 660 | |
a5c7d693 RS |
661 | create_fixed_operand (&ops[0], op0); |
662 | create_input_operand (&ops[1], value, innermode); | |
663 | create_integer_operand (&ops[2], pos); | |
664 | if (maybe_expand_insn (icode, 3, ops)) | |
665 | return true; | |
997404de JH |
666 | } |
667 | ||
308ecea0 | 668 | /* If the target is a register, overwriting the entire object, or storing |
bebf0797 RS |
669 | a full-word or multi-word field can be done with just a SUBREG. */ |
670 | if (!MEM_P (op0) | |
671 | && bitsize == GET_MODE_BITSIZE (fieldmode) | |
672 | && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) | |
673 | || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) | |
674 | { | |
675 | /* Use the subreg machinery either to narrow OP0 to the required | |
d8c84975 JJ |
676 | words or to cope with mode punning between equal-sized modes. |
677 | In the latter case, use subreg on the rhs side, not lhs. */ | |
678 | rtx sub; | |
679 | ||
680 | if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) | |
681 | { | |
682 | sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); | |
683 | if (sub) | |
684 | { | |
685 | emit_move_insn (op0, sub); | |
686 | return true; | |
687 | } | |
688 | } | |
689 | else | |
bebf0797 | 690 | { |
d8c84975 JJ |
691 | sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |
692 | bitnum / BITS_PER_UNIT); | |
693 | if (sub) | |
694 | { | |
695 | emit_move_insn (sub, value); | |
696 | return true; | |
697 | } | |
bebf0797 RS |
698 | } |
699 | } | |
308ecea0 | 700 | |
bebf0797 | 701 | /* If the target is memory, storing any naturally aligned field can be |
308ecea0 | 702 | done with a simple store. For targets that support fast unaligned |
0b69c29f | 703 | memory, any naturally sized, unit aligned field can be done directly. */ |
00efe3ea | 704 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) |
44037a66 | 705 | { |
bebf0797 | 706 | op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); |
44037a66 | 707 | emit_move_insn (op0, value); |
6d7db3c5 | 708 | return true; |
44037a66 TG |
709 | } |
710 | ||
a8ca7756 JW |
711 | /* Make sure we are playing with integral modes. Pun with subregs |
712 | if we aren't. This must come after the entire register case above, | |
713 | since that case is valid for any mode. The following cases are only | |
714 | valid for integral modes. */ | |
715 | { | |
ef4bddc2 | 716 | machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
a8ca7756 JW |
717 | if (imode != GET_MODE (op0)) |
718 | { | |
3c0cb5de | 719 | if (MEM_P (op0)) |
e98fc6de | 720 | op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); |
a8ca7756 | 721 | else |
5b0264cb NS |
722 | { |
723 | gcc_assert (imode != BLKmode); | |
724 | op0 = gen_lowpart (imode, op0); | |
725 | } | |
a8ca7756 JW |
726 | } |
727 | } | |
728 | ||
44037a66 | 729 | /* Storing an lsb-aligned field in a register |
bebf0797 | 730 | can be done with a movstrict instruction. */ |
44037a66 | 731 | |
3c0cb5de | 732 | if (!MEM_P (op0) |
bebf0797 | 733 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) |
44037a66 | 734 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
947131ba | 735 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
44037a66 | 736 | { |
a5c7d693 RS |
737 | struct expand_operand ops[2]; |
738 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); | |
5d560619 | 739 | rtx arg0 = op0; |
19228b93 | 740 | unsigned HOST_WIDE_INT subreg_off; |
5e4900c7 | 741 | |
a5c7d693 | 742 | if (GET_CODE (arg0) == SUBREG) |
44037a66 | 743 | { |
5b0264cb NS |
744 | /* Else we've got some float mode source being extracted into |
745 | a different float mode destination -- this combination of | |
746 | subregs results in Severe Tire Damage. */ | |
a5c7d693 | 747 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
5b0264cb NS |
748 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
749 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
a5c7d693 | 750 | arg0 = SUBREG_REG (arg0); |
5e4900c7 | 751 | } |
470032d7 | 752 | |
bebf0797 | 753 | subreg_off = bitnum / BITS_PER_UNIT; |
19228b93 JJ |
754 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) |
755 | { | |
756 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); | |
a5c7d693 | 757 | |
19228b93 JJ |
758 | create_fixed_operand (&ops[0], arg0); |
759 | /* Shrink the source operand to FIELDMODE. */ | |
760 | create_convert_operand_to (&ops[1], value, fieldmode, false); | |
761 | if (maybe_expand_insn (icode, 2, ops)) | |
762 | return true; | |
763 | } | |
44037a66 TG |
764 | } |
765 | ||
766 | /* Handle fields bigger than a word. */ | |
767 | ||
768 | if (bitsize > BITS_PER_WORD) | |
769 | { | |
770 | /* Here we transfer the words of the field | |
771 | in the order least significant first. | |
772 | This is because the most significant word is the one which may | |
ad83e87b PB |
773 | be less than full. |
774 | However, only do that if the value is not BLKmode. */ | |
775 | ||
770ae6cc RK |
776 | unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
777 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; | |
778 | unsigned int i; | |
f3f6fb16 | 779 | rtx_insn *last; |
44037a66 TG |
780 | |
781 | /* This is the mode we must force value to, so that there will be enough | |
782 | subwords to extract. Note that fieldmode will often (always?) be | |
783 | VOIDmode, because that is what store_field uses to indicate that this | |
535a42b1 NS |
784 | is a bit field, but passing VOIDmode to operand_subword_force |
785 | is not allowed. */ | |
9f5e2e11 RS |
786 | fieldmode = GET_MODE (value); |
787 | if (fieldmode == VOIDmode) | |
6f83092f | 788 | fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); |
44037a66 | 789 | |
6d7db3c5 | 790 | last = get_last_insn (); |
44037a66 TG |
791 | for (i = 0; i < nwords; i++) |
792 | { | |
ad83e87b PB |
793 | /* If I is 0, use the low-order word in both field and target; |
794 | if I is 1, use the next to lowest word; and so on. */ | |
00d6b19a AB |
795 | unsigned int wordnum = (backwards |
796 | ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD | |
797 | - i - 1 | |
798 | : i); | |
770ae6cc | 799 | unsigned int bit_offset = (backwards |
04050c69 RK |
800 | ? MAX ((int) bitsize - ((int) i + 1) |
801 | * BITS_PER_WORD, | |
802 | 0) | |
803 | : (int) i * BITS_PER_WORD); | |
6d7db3c5 | 804 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
3bdb97b8 AK |
805 | unsigned HOST_WIDE_INT new_bitsize = |
806 | MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); | |
807 | ||
808 | /* If the remaining chunk doesn't have full wordsize we have | |
809 | to make sure that for big endian machines the higher order | |
810 | bits are used. */ | |
811 | if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) | |
812 | value_word = simplify_expand_binop (word_mode, lshr_optab, | |
813 | value_word, | |
814 | GEN_INT (BITS_PER_WORD | |
815 | - new_bitsize), | |
816 | NULL_RTX, true, | |
817 | OPTAB_LIB_WIDEN); | |
818 | ||
819 | if (!store_bit_field_1 (op0, new_bitsize, | |
1169e45d AH |
820 | bitnum + bit_offset, |
821 | bitregion_start, bitregion_end, | |
822 | word_mode, | |
6d7db3c5 RS |
823 | value_word, fallback_p)) |
824 | { | |
825 | delete_insns_since (last); | |
826 | return false; | |
827 | } | |
44037a66 | 828 | } |
6d7db3c5 | 829 | return true; |
44037a66 TG |
830 | } |
831 | ||
4f1da2e9 RS |
832 | /* If VALUE has a floating-point or complex mode, access it as an |
833 | integer of the corresponding size. This can occur on a machine | |
834 | with 64 bit registers that uses SFmode for float. It can also | |
835 | occur for unaligned float or complex fields. */ | |
28526e20 | 836 | orig_value = value; |
4f1da2e9 RS |
837 | if (GET_MODE (value) != VOIDmode |
838 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT | |
32b069d3 | 839 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
4f1da2e9 RS |
840 | { |
841 | value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); | |
842 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); | |
843 | } | |
2305bcad | 844 | |
bebf0797 RS |
845 | /* If OP0 is a multi-word register, narrow it to the affected word. |
846 | If the region spans two words, defer to store_split_bit_field. */ | |
847 | if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
848 | { | |
849 | op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), | |
850 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); | |
851 | gcc_assert (op0); | |
852 | bitnum %= BITS_PER_WORD; | |
853 | if (bitnum + bitsize > BITS_PER_WORD) | |
854 | { | |
855 | if (!fallback_p) | |
856 | return false; | |
857 | ||
858 | store_split_bit_field (op0, bitsize, bitnum, bitregion_start, | |
859 | bitregion_end, value); | |
860 | return true; | |
861 | } | |
862 | } | |
863 | ||
864 | /* From here on we can assume that the field to be stored in fits | |
865 | within a word. If the destination is a register, it too fits | |
866 | in a word. */ | |
44037a66 | 867 | |
fcdd52b7 RS |
868 | extraction_insn insv; |
869 | if (!MEM_P (op0) | |
870 | && get_best_reg_extraction_insn (&insv, EP_insv, | |
871 | GET_MODE_BITSIZE (GET_MODE (op0)), | |
872 | fieldmode) | |
873 | && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) | |
a20556e4 | 874 | return true; |
6d7db3c5 RS |
875 | |
876 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
877 | cheap register alternative is available. */ | |
fcdd52b7 | 878 | if (MEM_P (op0)) |
6d7db3c5 | 879 | { |
f5d4f18c SL |
880 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
881 | fieldmode) | |
fcdd52b7 | 882 | && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) |
17a73ba0 RS |
883 | return true; |
884 | ||
f3f6fb16 | 885 | rtx_insn *last = get_last_insn (); |
6d7db3c5 | 886 | |
fcdd52b7 RS |
887 | /* Try loading part of OP0 into a register, inserting the bitfield |
888 | into that, and then copying the result back to OP0. */ | |
889 | unsigned HOST_WIDE_INT bitpos; | |
890 | rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, | |
891 | bitregion_start, bitregion_end, | |
892 | fieldmode, &bitpos); | |
893 | if (xop0) | |
0fb7aeda | 894 | { |
fcdd52b7 | 895 | rtx tempreg = copy_to_reg (xop0); |
bebf0797 | 896 | if (store_bit_field_1 (tempreg, bitsize, bitpos, |
1169e45d | 897 | bitregion_start, bitregion_end, |
6d7db3c5 RS |
898 | fieldmode, orig_value, false)) |
899 | { | |
900 | emit_move_insn (xop0, tempreg); | |
901 | return true; | |
902 | } | |
44037a66 | 903 | delete_insns_since (last); |
44037a66 TG |
904 | } |
905 | } | |
6d7db3c5 RS |
906 | |
907 | if (!fallback_p) | |
908 | return false; | |
909 | ||
bebf0797 RS |
910 | store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start, |
911 | bitregion_end, value); | |
6d7db3c5 RS |
912 | return true; |
913 | } | |
914 | ||
915 | /* Generate code to store value from rtx VALUE | |
916 | into a bit-field within structure STR_RTX | |
917 | containing BITSIZE bits starting at bit BITNUM. | |
1169e45d AH |
918 | |
919 | BITREGION_START is bitpos of the first bitfield in this region. | |
920 | BITREGION_END is the bitpos of the ending bitfield in this region. | |
921 | These two fields are 0, if the C++ memory model does not apply, | |
922 | or we are not interested in keeping track of bitfield regions. | |
923 | ||
6d7db3c5 RS |
924 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ |
925 | ||
926 | void | |
927 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1169e45d AH |
928 | unsigned HOST_WIDE_INT bitnum, |
929 | unsigned HOST_WIDE_INT bitregion_start, | |
930 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 931 | machine_mode fieldmode, |
6d7db3c5 RS |
932 | rtx value) |
933 | { | |
f5d4f18c | 934 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
6f4e9cf8 BE |
935 | if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode, |
936 | bitregion_start, bitregion_end)) | |
f5d4f18c | 937 | { |
f5d4f18c SL |
938 | /* Storing any naturally aligned field can be done with a simple |
939 | store. For targets that support fast unaligned memory, any | |
940 | naturally sized, unit aligned field can be done directly. */ | |
941 | if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode)) | |
942 | { | |
943 | str_rtx = adjust_bitfield_address (str_rtx, fieldmode, | |
944 | bitnum / BITS_PER_UNIT); | |
945 | emit_move_insn (str_rtx, value); | |
946 | } | |
947 | else | |
ebb99f96 BE |
948 | { |
949 | str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum, | |
950 | &bitnum); | |
951 | /* Explicitly override the C/C++ memory model; ignore the | |
952 | bit range so that we can do the access in the mode mandated | |
953 | by -fstrict-volatile-bitfields instead. */ | |
548cfdc2 | 954 | store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value); |
ebb99f96 BE |
955 | } |
956 | ||
f5d4f18c SL |
957 | return; |
958 | } | |
959 | ||
1169e45d AH |
960 | /* Under the C++0x memory model, we must not touch bits outside the |
961 | bit region. Adjust the address to start at the beginning of the | |
962 | bit region. */ | |
a59b038c | 963 | if (MEM_P (str_rtx) && bitregion_start > 0) |
1169e45d | 964 | { |
ef4bddc2 | 965 | machine_mode bestmode; |
ee88e690 | 966 | HOST_WIDE_INT offset, size; |
1169e45d | 967 | |
a59b038c EB |
968 | gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); |
969 | ||
1169e45d AH |
970 | offset = bitregion_start / BITS_PER_UNIT; |
971 | bitnum -= bitregion_start; | |
ee88e690 | 972 | size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; |
1169e45d AH |
973 | bitregion_end -= bitregion_start; |
974 | bitregion_start = 0; | |
975 | bestmode = get_best_mode (bitsize, bitnum, | |
976 | bitregion_start, bitregion_end, | |
fcdd52b7 | 977 | MEM_ALIGN (str_rtx), VOIDmode, |
1169e45d | 978 | MEM_VOLATILE_P (str_rtx)); |
ee88e690 | 979 | str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size); |
1169e45d AH |
980 | } |
981 | ||
982 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, | |
983 | bitregion_start, bitregion_end, | |
984 | fieldmode, value, true)) | |
6d7db3c5 | 985 | gcc_unreachable (); |
44037a66 TG |
986 | } |
987 | \f | |
bebf0797 RS |
988 | /* Use shifts and boolean operations to store VALUE into a bit field of |
989 | width BITSIZE in OP0, starting at bit BITNUM. */ | |
44037a66 TG |
990 | |
991 | static void | |
bebf0797 RS |
992 | store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
993 | unsigned HOST_WIDE_INT bitnum, | |
1169e45d AH |
994 | unsigned HOST_WIDE_INT bitregion_start, |
995 | unsigned HOST_WIDE_INT bitregion_end, | |
996 | rtx value) | |
44037a66 | 997 | { |
44037a66 TG |
998 | /* There is a case not handled here: |
999 | a structure with a known alignment of just a halfword | |
1000 | and a field split across two aligned halfwords within the structure. | |
1001 | Or likewise a structure with a known alignment of just a byte | |
1002 | and a field split across two bytes. | |
1003 | Such cases are not supposed to be able to occur. */ | |
1004 | ||
bebf0797 | 1005 | if (MEM_P (op0)) |
44037a66 | 1006 | { |
ef4bddc2 | 1007 | machine_mode mode = GET_MODE (op0); |
053a35af | 1008 | if (GET_MODE_BITSIZE (mode) == 0 |
0fb7aeda KH |
1009 | || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
1010 | mode = word_mode; | |
f5d4f18c SL |
1011 | mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
1012 | MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1013 | |
1014 | if (mode == VOIDmode) | |
1015 | { | |
1016 | /* The only way this should occur is if the field spans word | |
1017 | boundaries. */ | |
bebf0797 RS |
1018 | store_split_bit_field (op0, bitsize, bitnum, bitregion_start, |
1019 | bitregion_end, value); | |
44037a66 TG |
1020 | return; |
1021 | } | |
1022 | ||
26f8b976 | 1023 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 TG |
1024 | } |
1025 | ||
ebb99f96 | 1026 | store_fixed_bit_field_1 (op0, bitsize, bitnum, value); |
ebb99f96 BE |
1027 | } |
1028 | ||
1029 | /* Helper function for store_fixed_bit_field, stores | |
1030 | the bit field always using the MODE of OP0. */ | |
1031 | ||
1032 | static void | |
1033 | store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 EB |
1034 | unsigned HOST_WIDE_INT bitnum, |
1035 | rtx value) | |
ebb99f96 | 1036 | { |
ef4bddc2 | 1037 | machine_mode mode; |
ebb99f96 BE |
1038 | rtx temp; |
1039 | int all_zero = 0; | |
1040 | int all_one = 0; | |
1041 | ||
44037a66 | 1042 | mode = GET_MODE (op0); |
bebf0797 | 1043 | gcc_assert (SCALAR_INT_MODE_P (mode)); |
44037a66 | 1044 | |
bebf0797 RS |
1045 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1046 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ | |
44037a66 | 1047 | |
f76b9db2 | 1048 | if (BYTES_BIG_ENDIAN) |
bebf0797 RS |
1049 | /* BITNUM is the distance between our msb |
1050 | and that of the containing datum. | |
1051 | Convert it to the distance from the lsb. */ | |
1052 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; | |
44037a66 | 1053 | |
bebf0797 | 1054 | /* Now BITNUM is always the distance between our lsb |
44037a66 TG |
1055 | and that of OP0. */ |
1056 | ||
bebf0797 | 1057 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
44037a66 TG |
1058 | we must first convert its mode to MODE. */ |
1059 | ||
481683e1 | 1060 | if (CONST_INT_P (value)) |
44037a66 | 1061 | { |
e507a433 | 1062 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
44037a66 | 1063 | |
b1ec3c92 | 1064 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
e507a433 | 1065 | v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1; |
44037a66 TG |
1066 | |
1067 | if (v == 0) | |
1068 | all_zero = 1; | |
b1ec3c92 | 1069 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
e507a433 MP |
1070 | && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1) |
1071 | || (bitsize == HOST_BITS_PER_WIDE_INT | |
1072 | && v == (unsigned HOST_WIDE_INT) -1)) | |
44037a66 TG |
1073 | all_one = 1; |
1074 | ||
088c5368 | 1075 | value = lshift_value (mode, v, bitnum); |
44037a66 TG |
1076 | } |
1077 | else | |
1078 | { | |
1079 | int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize | |
bebf0797 | 1080 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
44037a66 TG |
1081 | |
1082 | if (GET_MODE (value) != mode) | |
86cfb27a | 1083 | value = convert_to_mode (mode, value, 1); |
44037a66 TG |
1084 | |
1085 | if (must_and) | |
1086 | value = expand_binop (mode, and_optab, value, | |
1087 | mask_rtx (mode, 0, bitsize, 0), | |
b1ec3c92 | 1088 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
bebf0797 | 1089 | if (bitnum > 0) |
44037a66 | 1090 | value = expand_shift (LSHIFT_EXPR, mode, value, |
bebf0797 | 1091 | bitnum, NULL_RTX, 1); |
44037a66 TG |
1092 | } |
1093 | ||
1094 | /* Now clear the chosen bits in OP0, | |
1095 | except that if VALUE is -1 we need not bother. */ | |
c505fc06 RS |
1096 | /* We keep the intermediates in registers to allow CSE to combine |
1097 | consecutive bitfield assignments. */ | |
44037a66 | 1098 | |
c505fc06 | 1099 | temp = force_reg (mode, op0); |
44037a66 TG |
1100 | |
1101 | if (! all_one) | |
1102 | { | |
c505fc06 | 1103 | temp = expand_binop (mode, and_optab, temp, |
bebf0797 | 1104 | mask_rtx (mode, bitnum, bitsize, 1), |
c505fc06 RS |
1105 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1106 | temp = force_reg (mode, temp); | |
44037a66 | 1107 | } |
44037a66 TG |
1108 | |
1109 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
1110 | ||
1111 | if (! all_zero) | |
c505fc06 RS |
1112 | { |
1113 | temp = expand_binop (mode, ior_optab, temp, value, | |
1114 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
1115 | temp = force_reg (mode, temp); | |
1116 | } | |
1117 | ||
44037a66 | 1118 | if (op0 != temp) |
4679504c UB |
1119 | { |
1120 | op0 = copy_rtx (op0); | |
1121 | emit_move_insn (op0, temp); | |
1122 | } | |
44037a66 TG |
1123 | } |
1124 | \f | |
06c94bce | 1125 | /* Store a bit field that is split across multiple accessible memory objects. |
44037a66 | 1126 | |
06c94bce | 1127 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
44037a66 TG |
1128 | BITSIZE is the field width; BITPOS the position of its first bit |
1129 | (within the word). | |
06c94bce | 1130 | VALUE is the value to store. |
06c94bce RS |
1131 | |
1132 | This does not yet handle fields wider than BITS_PER_WORD. */ | |
44037a66 TG |
1133 | |
1134 | static void | |
502b8322 | 1135 | store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1169e45d AH |
1136 | unsigned HOST_WIDE_INT bitpos, |
1137 | unsigned HOST_WIDE_INT bitregion_start, | |
1138 | unsigned HOST_WIDE_INT bitregion_end, | |
1139 | rtx value) | |
44037a66 | 1140 | { |
770ae6cc RK |
1141 | unsigned int unit; |
1142 | unsigned int bitsdone = 0; | |
4ee16841 | 1143 | |
0eb61c19 DE |
1144 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1145 | much at a time. */ | |
f8cfc6aa | 1146 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1147 | unit = BITS_PER_WORD; |
1148 | else | |
04050c69 | 1149 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e54d80d0 | 1150 | |
ebb99f96 BE |
1151 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1152 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us | |
1153 | again, and we will mutually recurse forever. */ | |
1154 | if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0) | |
1155 | unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0))); | |
1156 | ||
3d709ff0 RS |
1157 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1158 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1159 | that VALUE might be a floating-point constant. */ | |
481683e1 | 1160 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
3d709ff0 RS |
1161 | { |
1162 | rtx word = gen_lowpart_common (word_mode, value); | |
1163 | ||
bc8a0e39 | 1164 | if (word && (value != word)) |
3d709ff0 RS |
1165 | value = word; |
1166 | else | |
1167 | value = gen_lowpart_common (word_mode, | |
d01bc862 DE |
1168 | force_reg (GET_MODE (value) != VOIDmode |
1169 | ? GET_MODE (value) | |
1170 | : word_mode, value)); | |
3d709ff0 | 1171 | } |
44037a66 | 1172 | |
06c94bce | 1173 | while (bitsdone < bitsize) |
44037a66 | 1174 | { |
770ae6cc | 1175 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1176 | rtx part, word; |
770ae6cc RK |
1177 | unsigned HOST_WIDE_INT thispos; |
1178 | unsigned HOST_WIDE_INT offset; | |
44037a66 | 1179 | |
06c94bce RS |
1180 | offset = (bitpos + bitsdone) / unit; |
1181 | thispos = (bitpos + bitsdone) % unit; | |
44037a66 | 1182 | |
f1cc9589 | 1183 | /* When region of bytes we can touch is restricted, decrease |
bd3647bf JJ |
1184 | UNIT close to the end of the region as needed. If op0 is a REG |
1185 | or SUBREG of REG, don't do this, as there can't be data races | |
1186 | on a register and we can expand shorter code in some cases. */ | |
f1cc9589 JJ |
1187 | if (bitregion_end |
1188 | && unit > BITS_PER_UNIT | |
bd3647bf JJ |
1189 | && bitpos + bitsdone - thispos + unit > bitregion_end + 1 |
1190 | && !REG_P (op0) | |
1191 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) | |
f1cc9589 JJ |
1192 | { |
1193 | unit = unit / 2; | |
1194 | continue; | |
1195 | } | |
1196 | ||
0eb61c19 DE |
1197 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1198 | store_fixed_bit_field will call us again, and we will mutually | |
1199 | recurse forever. */ | |
1200 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1201 | thissize = MIN (thissize, unit - thispos); | |
44037a66 | 1202 | |
f76b9db2 ILT |
1203 | if (BYTES_BIG_ENDIAN) |
1204 | { | |
1205 | /* Fetch successively less significant portions. */ | |
481683e1 | 1206 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1207 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1208 | >> (bitsize - bitsdone - thissize)) | |
1209 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1210 | else | |
b8ab7fc8 RS |
1211 | { |
1212 | int total_bits = GET_MODE_BITSIZE (GET_MODE (value)); | |
1213 | /* The args are chosen so that the last part includes the | |
1214 | lsb. Give extract_bit_field the value it needs (with | |
1215 | endianness compensation) to fetch the piece we want. */ | |
1216 | part = extract_fixed_bit_field (word_mode, value, thissize, | |
1217 | total_bits - bitsize + bitsdone, | |
c6285bd7 | 1218 | NULL_RTX, 1); |
b8ab7fc8 | 1219 | } |
f76b9db2 | 1220 | } |
06c94bce | 1221 | else |
f76b9db2 ILT |
1222 | { |
1223 | /* Fetch successively more significant portions. */ | |
481683e1 | 1224 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1225 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1226 | >> bitsdone) | |
1227 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1228 | else | |
b8ab7fc8 | 1229 | part = extract_fixed_bit_field (word_mode, value, thissize, |
c6285bd7 | 1230 | bitsdone, NULL_RTX, 1); |
f76b9db2 | 1231 | } |
44037a66 | 1232 | |
06c94bce | 1233 | /* If OP0 is a register, then handle OFFSET here. |
5f57dff0 JW |
1234 | |
1235 | When handling multiword bitfields, extract_bit_field may pass | |
1236 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1237 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1238 | the current word starting from the base register. */ | |
1239 | if (GET_CODE (op0) == SUBREG) | |
1240 | { | |
bd3647bf JJ |
1241 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) |
1242 | + (offset * unit / BITS_PER_WORD); | |
ef4bddc2 | 1243 | machine_mode sub_mode = GET_MODE (SUBREG_REG (op0)); |
19228b93 JJ |
1244 | if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD) |
1245 | word = word_offset ? const0_rtx : op0; | |
1246 | else | |
1247 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
1248 | GET_MODE (SUBREG_REG (op0))); | |
bd3647bf | 1249 | offset &= BITS_PER_WORD / unit - 1; |
5f57dff0 | 1250 | } |
f8cfc6aa | 1251 | else if (REG_P (op0)) |
06c94bce | 1252 | { |
ef4bddc2 | 1253 | machine_mode op0_mode = GET_MODE (op0); |
19228b93 JJ |
1254 | if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) |
1255 | word = offset ? const0_rtx : op0; | |
1256 | else | |
bd3647bf JJ |
1257 | word = operand_subword_force (op0, offset * unit / BITS_PER_WORD, |
1258 | GET_MODE (op0)); | |
1259 | offset &= BITS_PER_WORD / unit - 1; | |
06c94bce RS |
1260 | } |
1261 | else | |
1262 | word = op0; | |
44037a66 | 1263 | |
bebf0797 | 1264 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
19228b93 JJ |
1265 | it is just an out-of-bounds access. Ignore it. */ |
1266 | if (word != const0_rtx) | |
bebf0797 RS |
1267 | store_fixed_bit_field (word, thissize, offset * unit + thispos, |
1268 | bitregion_start, bitregion_end, part); | |
06c94bce RS |
1269 | bitsdone += thissize; |
1270 | } | |
44037a66 TG |
1271 | } |
1272 | \f | |
6d7db3c5 RS |
1273 | /* A subroutine of extract_bit_field_1 that converts return value X |
1274 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1275 | to extract_bit_field. */ | |
44037a66 | 1276 | |
6d7db3c5 | 1277 | static rtx |
ef4bddc2 RS |
1278 | convert_extracted_bit_field (rtx x, machine_mode mode, |
1279 | machine_mode tmode, bool unsignedp) | |
6d7db3c5 RS |
1280 | { |
1281 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1282 | return x; | |
44037a66 | 1283 | |
6d7db3c5 RS |
1284 | /* If the x mode is not a scalar integral, first convert to the |
1285 | integer mode of that size and then access it as a floating-point | |
1286 | value via a SUBREG. */ | |
1287 | if (!SCALAR_INT_MODE_P (tmode)) | |
1288 | { | |
ef4bddc2 | 1289 | machine_mode smode; |
44037a66 | 1290 | |
6d7db3c5 RS |
1291 | smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
1292 | x = convert_to_mode (smode, x, unsignedp); | |
1293 | x = force_reg (smode, x); | |
1294 | return gen_lowpart (tmode, x); | |
1295 | } | |
44037a66 | 1296 | |
6d7db3c5 RS |
1297 | return convert_to_mode (tmode, x, unsignedp); |
1298 | } | |
1299 | ||
a20556e4 RS |
1300 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1301 | Return the extracted value on success, otherwise return null. | |
1302 | EXT_MODE is the mode of the extraction and the other arguments | |
1303 | are as for extract_bit_field. */ | |
1304 | ||
1305 | static rtx | |
fcdd52b7 RS |
1306 | extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, |
1307 | unsigned HOST_WIDE_INT bitsize, | |
a20556e4 RS |
1308 | unsigned HOST_WIDE_INT bitnum, |
1309 | int unsignedp, rtx target, | |
ef4bddc2 | 1310 | machine_mode mode, machine_mode tmode) |
a20556e4 RS |
1311 | { |
1312 | struct expand_operand ops[4]; | |
1313 | rtx spec_target = target; | |
1314 | rtx spec_target_subreg = 0; | |
ef4bddc2 | 1315 | machine_mode ext_mode = extv->field_mode; |
a20556e4 RS |
1316 | unsigned unit = GET_MODE_BITSIZE (ext_mode); |
1317 | ||
1318 | if (bitsize == 0 || unit < bitsize) | |
1319 | return NULL_RTX; | |
1320 | ||
1321 | if (MEM_P (op0)) | |
26f8b976 | 1322 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
1323 | op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, |
1324 | &bitnum); | |
a20556e4 RS |
1325 | else |
1326 | { | |
1327 | /* Convert from counting within OP0 to counting in EXT_MODE. */ | |
1328 | if (BYTES_BIG_ENDIAN) | |
1329 | bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
1330 | ||
1331 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1332 | acceptable to the format of ext(z)v. */ | |
1333 | if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | |
1334 | return NULL_RTX; | |
1335 | if (REG_P (op0) && GET_MODE (op0) != ext_mode) | |
1336 | op0 = gen_lowpart_SUBREG (ext_mode, op0); | |
1337 | } | |
1338 | ||
1339 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | |
1340 | "backwards" from the size of the unit we are extracting from. | |
1341 | Otherwise, we count bits from the most significant on a | |
1342 | BYTES/BITS_BIG_ENDIAN machine. */ | |
1343 | ||
1344 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1345 | bitnum = unit - bitsize - bitnum; | |
1346 | ||
1347 | if (target == 0) | |
1348 | target = spec_target = gen_reg_rtx (tmode); | |
1349 | ||
1350 | if (GET_MODE (target) != ext_mode) | |
1351 | { | |
1352 | /* Don't use LHS paradoxical subreg if explicit truncation is needed | |
1353 | between the mode of the extraction (word_mode) and the target | |
1354 | mode. Instead, create a temporary and use convert_move to set | |
1355 | the target. */ | |
1356 | if (REG_P (target) | |
1357 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) | |
1358 | { | |
1359 | target = gen_lowpart (ext_mode, target); | |
1360 | if (GET_MODE_PRECISION (ext_mode) | |
1361 | > GET_MODE_PRECISION (GET_MODE (spec_target))) | |
1362 | spec_target_subreg = target; | |
1363 | } | |
1364 | else | |
1365 | target = gen_reg_rtx (ext_mode); | |
1366 | } | |
1367 | ||
1368 | create_output_operand (&ops[0], target, ext_mode); | |
1369 | create_fixed_operand (&ops[1], op0); | |
1370 | create_integer_operand (&ops[2], bitsize); | |
1371 | create_integer_operand (&ops[3], bitnum); | |
fcdd52b7 | 1372 | if (maybe_expand_insn (extv->icode, 4, ops)) |
a20556e4 RS |
1373 | { |
1374 | target = ops[0].value; | |
1375 | if (target == spec_target) | |
1376 | return target; | |
1377 | if (target == spec_target_subreg) | |
1378 | return spec_target; | |
1379 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1380 | } | |
1381 | return NULL_RTX; | |
1382 | } | |
1383 | ||
6d7db3c5 RS |
1384 | /* A subroutine of extract_bit_field, with the same arguments. |
1385 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1386 | if we can find no other means of implementing the operation. | |
1387 | if FALLBACK_P is false, return NULL instead. */ | |
1388 | ||
1389 | static rtx | |
1390 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
c6285bd7 | 1391 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
ef4bddc2 | 1392 | machine_mode mode, machine_mode tmode, |
6d7db3c5 | 1393 | bool fallback_p) |
44037a66 | 1394 | { |
b3694847 | 1395 | rtx op0 = str_rtx; |
ef4bddc2 RS |
1396 | machine_mode int_mode; |
1397 | machine_mode mode1; | |
44037a66 | 1398 | |
44037a66 TG |
1399 | if (tmode == VOIDmode) |
1400 | tmode = mode; | |
6ca6193b | 1401 | |
44037a66 TG |
1402 | while (GET_CODE (op0) == SUBREG) |
1403 | { | |
2c58f7dd | 1404 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
44037a66 TG |
1405 | op0 = SUBREG_REG (op0); |
1406 | } | |
77295dec | 1407 | |
2c58f7dd | 1408 | /* If we have an out-of-bounds access to a register, just return an |
647eea9d | 1409 | uninitialized register of the required mode. This can occur if the |
2c58f7dd RS |
1410 | source code contains an out-of-bounds access to a small array. */ |
1411 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1412 | return gen_reg_rtx (tmode); | |
1413 | ||
f8cfc6aa | 1414 | if (REG_P (op0) |
aac280fb DD |
1415 | && mode == GET_MODE (op0) |
1416 | && bitnum == 0 | |
0b69c29f | 1417 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
aac280fb | 1418 | { |
0b69c29f | 1419 | /* We're trying to extract a full register from itself. */ |
aac280fb DD |
1420 | return op0; |
1421 | } | |
1422 | ||
0890b981 AP |
1423 | /* See if we can get a better vector mode before extracting. */ |
1424 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1425 | && !MEM_P (op0) | |
1426 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1427 | { | |
ef4bddc2 | 1428 | machine_mode new_mode; |
0890b981 AP |
1429 | |
1430 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1431 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
325217ed CF |
1432 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1433 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1434 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1435 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1436 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1437 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1438 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1439 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
0890b981 AP |
1440 | else |
1441 | new_mode = MIN_MODE_VECTOR_INT; | |
1442 | ||
1443 | for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) | |
b147c5b9 | 1444 | if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
0890b981 AP |
1445 | && targetm.vector_mode_supported_p (new_mode)) |
1446 | break; | |
1447 | if (new_mode != VOIDmode) | |
1448 | op0 = gen_lowpart (new_mode, op0); | |
1449 | } | |
1450 | ||
997404de JH |
1451 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1452 | available. */ | |
1453 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 1454 | && !MEM_P (op0) |
947131ba | 1455 | && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing |
b42271d6 JB |
1456 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
1457 | == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
997404de | 1458 | { |
a5c7d693 | 1459 | struct expand_operand ops[3]; |
ef4bddc2 RS |
1460 | machine_mode outermode = GET_MODE (op0); |
1461 | machine_mode innermode = GET_MODE_INNER (outermode); | |
a5c7d693 | 1462 | enum insn_code icode = optab_handler (vec_extract_optab, outermode); |
b42271d6 | 1463 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de | 1464 | |
a5c7d693 RS |
1465 | create_output_operand (&ops[0], target, innermode); |
1466 | create_input_operand (&ops[1], op0, outermode); | |
1467 | create_integer_operand (&ops[2], pos); | |
1468 | if (maybe_expand_insn (icode, 3, ops)) | |
997404de | 1469 | { |
a5c7d693 RS |
1470 | target = ops[0].value; |
1471 | if (GET_MODE (target) != mode) | |
1472 | return gen_lowpart (tmode, target); | |
1473 | return target; | |
997404de JH |
1474 | } |
1475 | } | |
1476 | ||
d006aa54 RH |
1477 | /* Make sure we are playing with integral modes. Pun with subregs |
1478 | if we aren't. */ | |
1479 | { | |
ef4bddc2 | 1480 | machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
d006aa54 RH |
1481 | if (imode != GET_MODE (op0)) |
1482 | { | |
a6d2976a | 1483 | if (MEM_P (op0)) |
e98fc6de | 1484 | op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); |
7d293b58 | 1485 | else if (imode != BLKmode) |
a6d2976a | 1486 | { |
a6d2976a | 1487 | op0 = gen_lowpart (imode, op0); |
360e3535 | 1488 | |
a6d2976a JDA |
1489 | /* If we got a SUBREG, force it into a register since we |
1490 | aren't going to be able to do another SUBREG on it. */ | |
1491 | if (GET_CODE (op0) == SUBREG) | |
1492 | op0 = force_reg (imode, op0); | |
1493 | } | |
7d293b58 JJ |
1494 | else if (REG_P (op0)) |
1495 | { | |
1496 | rtx reg, subreg; | |
1497 | imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), | |
1498 | MODE_INT); | |
1499 | reg = gen_reg_rtx (imode); | |
1500 | subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); | |
1501 | emit_move_insn (subreg, op0); | |
1502 | op0 = reg; | |
1503 | bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; | |
1504 | } | |
1505 | else | |
1506 | { | |
e98fc6de RS |
1507 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); |
1508 | rtx mem = assign_stack_temp (GET_MODE (op0), size); | |
7d293b58 | 1509 | emit_move_insn (mem, op0); |
e98fc6de | 1510 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); |
7d293b58 | 1511 | } |
d006aa54 RH |
1512 | } |
1513 | } | |
1514 | ||
6ca6193b JDA |
1515 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1516 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1517 | if needed. */ | |
e98f90d3 | 1518 | |
f5d4f18c SL |
1519 | /* Get the mode of the field to use for atomic access or subreg |
1520 | conversion. */ | |
b8ab7fc8 RS |
1521 | mode1 = mode; |
1522 | if (SCALAR_INT_MODE_P (tmode)) | |
44037a66 | 1523 | { |
ef4bddc2 | 1524 | machine_mode try_mode = mode_for_size (bitsize, |
b8ab7fc8 RS |
1525 | GET_MODE_CLASS (tmode), 0); |
1526 | if (try_mode != BLKmode) | |
1527 | mode1 = try_mode; | |
1528 | } | |
1529 | gcc_assert (mode1 != BLKmode); | |
1530 | ||
1531 | /* Extraction of a full MODE1 value can be done with a subreg as long | |
1532 | as the least significant bit of the value is the least significant | |
1533 | bit of either OP0 or a word of OP0. */ | |
1534 | if (!MEM_P (op0) | |
1535 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) | |
1536 | && bitsize == GET_MODE_BITSIZE (mode1) | |
1537 | && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))) | |
1538 | { | |
1539 | rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), | |
1540 | bitnum / BITS_PER_UNIT); | |
1541 | if (sub) | |
1542 | return convert_extracted_bit_field (sub, mode, tmode, unsignedp); | |
1543 | } | |
1544 | ||
1545 | /* Extraction of a full MODE1 value can be done with a load as long as | |
1546 | the field is on a byte boundary and is sufficiently aligned. */ | |
00efe3ea | 1547 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) |
b8ab7fc8 RS |
1548 | { |
1549 | op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); | |
1550 | return convert_extracted_bit_field (op0, mode, tmode, unsignedp); | |
44037a66 | 1551 | } |
b8ab7fc8 | 1552 | |
44037a66 | 1553 | /* Handle fields bigger than a word. */ |
c410d49e | 1554 | |
44037a66 TG |
1555 | if (bitsize > BITS_PER_WORD) |
1556 | { | |
1557 | /* Here we transfer the words of the field | |
1558 | in the order least significant first. | |
1559 | This is because the most significant word is the one which may | |
1560 | be less than full. */ | |
1561 | ||
0cd9e9ee | 1562 | unsigned int backwards = WORDS_BIG_ENDIAN; |
770ae6cc RK |
1563 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1564 | unsigned int i; | |
f3f6fb16 | 1565 | rtx_insn *last; |
44037a66 | 1566 | |
02972eaf | 1567 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
44037a66 TG |
1568 | target = gen_reg_rtx (mode); |
1569 | ||
34ea783b | 1570 | /* Indicate for flow that the entire target reg is being set. */ |
c41c1387 | 1571 | emit_clobber (target); |
34ea783b | 1572 | |
5ef0b50d | 1573 | last = get_last_insn (); |
44037a66 TG |
1574 | for (i = 0; i < nwords; i++) |
1575 | { | |
1576 | /* If I is 0, use the low-order word in both field and target; | |
1577 | if I is 1, use the next to lowest word; and so on. */ | |
77295dec | 1578 | /* Word number in TARGET to use. */ |
770ae6cc | 1579 | unsigned int wordnum |
0cd9e9ee | 1580 | = (backwards |
770ae6cc RK |
1581 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 |
1582 | : i); | |
77295dec | 1583 | /* Offset from start of field in OP0. */ |
0cd9e9ee EB |
1584 | unsigned int bit_offset = (backwards |
1585 | ? MAX ((int) bitsize - ((int) i + 1) | |
1586 | * BITS_PER_WORD, | |
1587 | 0) | |
770ae6cc | 1588 | : (int) i * BITS_PER_WORD); |
44037a66 TG |
1589 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1590 | rtx result_part | |
5ef0b50d EB |
1591 | = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, |
1592 | bitsize - i * BITS_PER_WORD), | |
c6285bd7 | 1593 | bitnum + bit_offset, 1, target_part, |
5ef0b50d | 1594 | mode, word_mode, fallback_p); |
44037a66 | 1595 | |
5b0264cb | 1596 | gcc_assert (target_part); |
5ef0b50d EB |
1597 | if (!result_part) |
1598 | { | |
1599 | delete_insns_since (last); | |
1600 | return NULL; | |
1601 | } | |
44037a66 TG |
1602 | |
1603 | if (result_part != target_part) | |
1604 | emit_move_insn (target_part, result_part); | |
1605 | } | |
1606 | ||
5f57dff0 | 1607 | if (unsignedp) |
77295dec DE |
1608 | { |
1609 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1610 | need to be zero'd out. */ | |
1611 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1612 | { | |
770ae6cc | 1613 | unsigned int i, total_words; |
77295dec DE |
1614 | |
1615 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1616 | for (i = nwords; i < total_words; i++) | |
04050c69 RK |
1617 | emit_move_insn |
1618 | (operand_subword (target, | |
0cd9e9ee | 1619 | backwards ? total_words - i - 1 : i, |
04050c69 RK |
1620 | 1, VOIDmode), |
1621 | const0_rtx); | |
77295dec DE |
1622 | } |
1623 | return target; | |
1624 | } | |
1625 | ||
5f57dff0 JW |
1626 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1627 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
eb6c3df1 | 1628 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
5f57dff0 | 1629 | return expand_shift (RSHIFT_EXPR, mode, target, |
eb6c3df1 | 1630 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
44037a66 | 1631 | } |
c410d49e | 1632 | |
b8ab7fc8 RS |
1633 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1634 | If the region spans two words, defer to extract_split_bit_field. */ | |
1635 | if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
44037a66 | 1636 | { |
b8ab7fc8 RS |
1637 | op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), |
1638 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); | |
1639 | bitnum %= BITS_PER_WORD; | |
1640 | if (bitnum + bitsize > BITS_PER_WORD) | |
470032d7 | 1641 | { |
b8ab7fc8 RS |
1642 | if (!fallback_p) |
1643 | return NULL_RTX; | |
1644 | target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp); | |
1645 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
470032d7 | 1646 | } |
44037a66 | 1647 | } |
44037a66 | 1648 | |
b8ab7fc8 RS |
1649 | /* From here on we know the desired field is smaller than a word. |
1650 | If OP0 is a register, it too fits within a word. */ | |
fcdd52b7 RS |
1651 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
1652 | extraction_insn extv; | |
1653 | if (!MEM_P (op0) | |
c0a8a3e6 RS |
1654 | /* ??? We could limit the structure size to the part of OP0 that |
1655 | contains the field, with appropriate checks for endianness | |
1656 | and TRULY_NOOP_TRUNCATION. */ | |
1657 | && get_best_reg_extraction_insn (&extv, pattern, | |
1658 | GET_MODE_BITSIZE (GET_MODE (op0)), | |
fcdd52b7 | 1659 | tmode)) |
44037a66 | 1660 | { |
fcdd52b7 | 1661 | rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum, |
a20556e4 | 1662 | unsignedp, target, mode, |
fcdd52b7 | 1663 | tmode); |
a20556e4 RS |
1664 | if (result) |
1665 | return result; | |
44037a66 | 1666 | } |
f76b9db2 | 1667 | |
6d7db3c5 RS |
1668 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1669 | cheap register alternative is available. */ | |
fcdd52b7 | 1670 | if (MEM_P (op0)) |
6d7db3c5 | 1671 | { |
f5d4f18c SL |
1672 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
1673 | tmode)) | |
17a73ba0 | 1674 | { |
fcdd52b7 RS |
1675 | rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, |
1676 | bitnum, unsignedp, | |
1677 | target, mode, | |
1678 | tmode); | |
17a73ba0 RS |
1679 | if (result) |
1680 | return result; | |
1681 | } | |
1682 | ||
f3f6fb16 | 1683 | rtx_insn *last = get_last_insn (); |
f76b9db2 | 1684 | |
fcdd52b7 RS |
1685 | /* Try loading part of OP0 into a register and extracting the |
1686 | bitfield from that. */ | |
1687 | unsigned HOST_WIDE_INT bitpos; | |
1688 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, | |
1689 | 0, 0, tmode, &bitpos); | |
1690 | if (xop0) | |
6d7db3c5 | 1691 | { |
fcdd52b7 RS |
1692 | xop0 = copy_to_reg (xop0); |
1693 | rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, | |
c6285bd7 | 1694 | unsignedp, target, |
6d7db3c5 | 1695 | mode, tmode, false); |
fcdd52b7 RS |
1696 | if (result) |
1697 | return result; | |
1698 | delete_insns_since (last); | |
c410d49e | 1699 | } |
44037a66 | 1700 | } |
562fc702 | 1701 | |
6d7db3c5 RS |
1702 | if (!fallback_p) |
1703 | return NULL; | |
1704 | ||
b8ab7fc8 RS |
1705 | /* Find a correspondingly-sized integer field, so we can apply |
1706 | shifts and masks to it. */ | |
1707 | int_mode = int_mode_for_mode (tmode); | |
1708 | if (int_mode == BLKmode) | |
1709 | int_mode = int_mode_for_mode (mode); | |
1710 | /* Should probably push op0 out to memory and then do a load. */ | |
1711 | gcc_assert (int_mode != BLKmode); | |
1712 | ||
1713 | target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, | |
c6285bd7 | 1714 | target, unsignedp); |
6d7db3c5 RS |
1715 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
1716 | } | |
1717 | ||
1718 | /* Generate code to extract a byte-field from STR_RTX | |
1719 | containing BITSIZE bits, starting at BITNUM, | |
1720 | and put it in TARGET if possible (if TARGET is nonzero). | |
1721 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1722 | ||
1723 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1724 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
1725 | MODE is the natural mode of the field value once extracted. | |
1726 | TMODE is the mode the caller would like the value to have; | |
1727 | but the value may be returned with type MODE instead. | |
1728 | ||
1729 | If a TARGET is specified and we can store in it at no extra cost, | |
1730 | we do so, and return TARGET. | |
1731 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1732 | if they are equally easy. */ | |
1733 | ||
1734 | rtx | |
1735 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
c6285bd7 | 1736 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
ef4bddc2 | 1737 | machine_mode mode, machine_mode tmode) |
6d7db3c5 | 1738 | { |
ef4bddc2 | 1739 | machine_mode mode1; |
f5d4f18c SL |
1740 | |
1741 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ | |
1742 | if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) | |
1743 | mode1 = GET_MODE (str_rtx); | |
1744 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) | |
1745 | mode1 = GET_MODE (target); | |
1746 | else | |
1747 | mode1 = tmode; | |
1748 | ||
6f4e9cf8 | 1749 | if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0)) |
f5d4f18c SL |
1750 | { |
1751 | rtx result; | |
1752 | ||
1753 | /* Extraction of a full MODE1 value can be done with a load as long as | |
1754 | the field is on a byte boundary and is sufficiently aligned. */ | |
1755 | if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1)) | |
1756 | result = adjust_bitfield_address (str_rtx, mode1, | |
1757 | bitnum / BITS_PER_UNIT); | |
1758 | else | |
6f4e9cf8 BE |
1759 | { |
1760 | str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum, | |
1761 | &bitnum); | |
1762 | result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum, | |
1763 | target, unsignedp); | |
1764 | } | |
1765 | ||
f5d4f18c SL |
1766 | return convert_extracted_bit_field (result, mode, tmode, unsignedp); |
1767 | } | |
1768 | ||
c6285bd7 | 1769 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
6d7db3c5 | 1770 | target, mode, tmode, true); |
44037a66 TG |
1771 | } |
1772 | \f | |
b8ab7fc8 RS |
1773 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
1774 | from bit BITNUM of OP0. | |
44037a66 TG |
1775 | |
1776 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
1777 | If TARGET is nonzero, attempts to store the value there | |
1778 | and return TARGET, but this is not guaranteed. | |
04050c69 | 1779 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
44037a66 TG |
1780 | |
1781 | static rtx | |
ef4bddc2 | 1782 | extract_fixed_bit_field (machine_mode tmode, rtx op0, |
502b8322 | 1783 | unsigned HOST_WIDE_INT bitsize, |
b8ab7fc8 | 1784 | unsigned HOST_WIDE_INT bitnum, rtx target, |
c6285bd7 | 1785 | int unsignedp) |
44037a66 | 1786 | { |
b8ab7fc8 | 1787 | if (MEM_P (op0)) |
44037a66 | 1788 | { |
ef4bddc2 | 1789 | machine_mode mode |
548cfdc2 EB |
1790 | = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode, |
1791 | MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1792 | |
1793 | if (mode == VOIDmode) | |
1794 | /* The only way this should occur is if the field spans word | |
1795 | boundaries. */ | |
b8ab7fc8 | 1796 | return extract_split_bit_field (op0, bitsize, bitnum, unsignedp); |
44037a66 | 1797 | |
f5d4f18c | 1798 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 TG |
1799 | } |
1800 | ||
6f4e9cf8 BE |
1801 | return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum, |
1802 | target, unsignedp); | |
1803 | } | |
1804 | ||
1805 | /* Helper function for extract_fixed_bit_field, extracts | |
1806 | the bit field always using the MODE of OP0. */ | |
1807 | ||
1808 | static rtx | |
ef4bddc2 | 1809 | extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, |
6f4e9cf8 BE |
1810 | unsigned HOST_WIDE_INT bitsize, |
1811 | unsigned HOST_WIDE_INT bitnum, rtx target, | |
1812 | int unsignedp) | |
1813 | { | |
ef4bddc2 | 1814 | machine_mode mode = GET_MODE (op0); |
b8ab7fc8 RS |
1815 | gcc_assert (SCALAR_INT_MODE_P (mode)); |
1816 | ||
1817 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) | |
1818 | for invalid input, such as extract equivalent of f5 from | |
1819 | gcc.dg/pr48335-2.c. */ | |
37811a73 | 1820 | |
f76b9db2 | 1821 | if (BYTES_BIG_ENDIAN) |
b8ab7fc8 | 1822 | /* BITNUM is the distance between our msb and that of OP0. |
04050c69 | 1823 | Convert it to the distance from the lsb. */ |
b8ab7fc8 | 1824 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
44037a66 | 1825 | |
b8ab7fc8 | 1826 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
44037a66 TG |
1827 | We have reduced the big-endian case to the little-endian case. */ |
1828 | ||
1829 | if (unsignedp) | |
1830 | { | |
b8ab7fc8 | 1831 | if (bitnum) |
44037a66 TG |
1832 | { |
1833 | /* If the field does not already start at the lsb, | |
1834 | shift it so it does. */ | |
44037a66 | 1835 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 1836 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
6399c0ab SB |
1837 | if (tmode != mode) |
1838 | subtarget = 0; | |
b8ab7fc8 | 1839 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
44037a66 TG |
1840 | } |
1841 | /* Convert the value to the desired mode. */ | |
1842 | if (mode != tmode) | |
1843 | op0 = convert_to_mode (tmode, op0, 1); | |
1844 | ||
1845 | /* Unless the msb of the field used to be the msb when we shifted, | |
1846 | mask out the upper bits. */ | |
1847 | ||
b8ab7fc8 | 1848 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) |
44037a66 TG |
1849 | return expand_binop (GET_MODE (op0), and_optab, op0, |
1850 | mask_rtx (GET_MODE (op0), 0, bitsize, 0), | |
1851 | target, 1, OPTAB_LIB_WIDEN); | |
1852 | return op0; | |
1853 | } | |
1854 | ||
1855 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
1856 | then arithmetic-shift its lsb to the lsb of the word. */ | |
1857 | op0 = force_reg (mode, op0); | |
44037a66 TG |
1858 | |
1859 | /* Find the narrowest integer mode that contains the field. */ | |
1860 | ||
1861 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; | |
1862 | mode = GET_MODE_WIDER_MODE (mode)) | |
b8ab7fc8 | 1863 | if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum) |
44037a66 TG |
1864 | { |
1865 | op0 = convert_to_mode (mode, op0, 0); | |
1866 | break; | |
1867 | } | |
1868 | ||
ccb1b17b JJ |
1869 | if (mode != tmode) |
1870 | target = 0; | |
1871 | ||
b8ab7fc8 | 1872 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
44037a66 | 1873 | { |
b8ab7fc8 | 1874 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
44037a66 | 1875 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 1876 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
1877 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
1878 | } | |
1879 | ||
1880 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 1881 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
44037a66 | 1882 | } |
44037a66 TG |
1883 | |
1884 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
088c5368 | 1885 | VALUE << BITPOS. */ |
44037a66 TG |
1886 | |
1887 | static rtx | |
ef4bddc2 | 1888 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
088c5368 | 1889 | int bitpos) |
44037a66 | 1890 | { |
807e902e | 1891 | return immed_wide_int_const (wi::lshift (value, bitpos), mode); |
44037a66 TG |
1892 | } |
1893 | \f | |
1894 | /* Extract a bit field that is split across two words | |
1895 | and return an RTX for the result. | |
1896 | ||
1897 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
1898 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
04050c69 | 1899 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ |
44037a66 TG |
1900 | |
1901 | static rtx | |
502b8322 AJ |
1902 | extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1903 | unsigned HOST_WIDE_INT bitpos, int unsignedp) | |
44037a66 | 1904 | { |
770ae6cc RK |
1905 | unsigned int unit; |
1906 | unsigned int bitsdone = 0; | |
c16ddde3 | 1907 | rtx result = NULL_RTX; |
06c94bce | 1908 | int first = 1; |
44037a66 | 1909 | |
4ee16841 DE |
1910 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1911 | much at a time. */ | |
f8cfc6aa | 1912 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1913 | unit = BITS_PER_WORD; |
1914 | else | |
609023ff | 1915 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
4ee16841 | 1916 | |
06c94bce RS |
1917 | while (bitsdone < bitsize) |
1918 | { | |
770ae6cc | 1919 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1920 | rtx part, word; |
770ae6cc RK |
1921 | unsigned HOST_WIDE_INT thispos; |
1922 | unsigned HOST_WIDE_INT offset; | |
06c94bce RS |
1923 | |
1924 | offset = (bitpos + bitsdone) / unit; | |
1925 | thispos = (bitpos + bitsdone) % unit; | |
1926 | ||
0eb61c19 DE |
1927 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1928 | extract_fixed_bit_field will call us again, and we will mutually | |
1929 | recurse forever. */ | |
1930 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1931 | thissize = MIN (thissize, unit - thispos); | |
06c94bce RS |
1932 | |
1933 | /* If OP0 is a register, then handle OFFSET here. | |
5f57dff0 JW |
1934 | |
1935 | When handling multiword bitfields, extract_bit_field may pass | |
1936 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1937 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1938 | the current word starting from the base register. */ | |
1939 | if (GET_CODE (op0) == SUBREG) | |
1940 | { | |
ddef6bc7 JJ |
1941 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
1942 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
5f57dff0 JW |
1943 | GET_MODE (SUBREG_REG (op0))); |
1944 | offset = 0; | |
1945 | } | |
f8cfc6aa | 1946 | else if (REG_P (op0)) |
06c94bce RS |
1947 | { |
1948 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
1949 | offset = 0; | |
1950 | } | |
1951 | else | |
1952 | word = op0; | |
1953 | ||
06c94bce | 1954 | /* Extract the parts in bit-counting order, |
0eb61c19 | 1955 | whose meaning is determined by BYTES_PER_UNIT. |
b8ab7fc8 RS |
1956 | OFFSET is in UNITs, and UNIT is in bits. */ |
1957 | part = extract_fixed_bit_field (word_mode, word, thissize, | |
c6285bd7 | 1958 | offset * unit + thispos, 0, 1); |
06c94bce | 1959 | bitsdone += thissize; |
44037a66 | 1960 | |
06c94bce | 1961 | /* Shift this part into place for the result. */ |
f76b9db2 ILT |
1962 | if (BYTES_BIG_ENDIAN) |
1963 | { | |
1964 | if (bitsize != bitsdone) | |
1965 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 1966 | bitsize - bitsdone, 0, 1); |
f76b9db2 ILT |
1967 | } |
1968 | else | |
1969 | { | |
1970 | if (bitsdone != thissize) | |
1971 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 1972 | bitsdone - thissize, 0, 1); |
f76b9db2 | 1973 | } |
44037a66 | 1974 | |
06c94bce RS |
1975 | if (first) |
1976 | result = part; | |
1977 | else | |
1978 | /* Combine the parts with bitwise or. This works | |
1979 | because we extracted each part as an unsigned bit field. */ | |
1980 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
1981 | OPTAB_LIB_WIDEN); | |
1982 | ||
1983 | first = 0; | |
1984 | } | |
44037a66 TG |
1985 | |
1986 | /* Unsigned bit field: we are done. */ | |
1987 | if (unsignedp) | |
1988 | return result; | |
1989 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
1990 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
eb6c3df1 | 1991 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 | 1992 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
eb6c3df1 | 1993 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 TG |
1994 | } |
1995 | \f | |
18b526e8 RS |
1996 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
1997 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
1998 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
1999 | mode is unknown (as for CC modes) or if the extraction would involve | |
2000 | unprofitable mode punning. Return the value on success, otherwise | |
2001 | return null. | |
2002 | ||
2003 | This is different from gen_lowpart* in these respects: | |
2004 | ||
2005 | - the returned value must always be considered an rvalue | |
2006 | ||
2007 | - when MODE is wider than SRC_MODE, the extraction involves | |
2008 | a zero extension | |
2009 | ||
2010 | - when MODE is smaller than SRC_MODE, the extraction involves | |
2011 | a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). | |
2012 | ||
2013 | In other words, this routine performs a computation, whereas the | |
2014 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2015 | operations. */ | |
2016 | ||
2017 | rtx | |
ef4bddc2 | 2018 | extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) |
18b526e8 | 2019 | { |
ef4bddc2 | 2020 | machine_mode int_mode, src_int_mode; |
18b526e8 RS |
2021 | |
2022 | if (mode == src_mode) | |
2023 | return src; | |
2024 | ||
2025 | if (CONSTANT_P (src)) | |
d898d29b JJ |
2026 | { |
2027 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2028 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2029 | invalid SUBREGs. */ | |
2030 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); | |
2031 | rtx ret = simplify_subreg (mode, src, src_mode, byte); | |
2032 | if (ret) | |
2033 | return ret; | |
2034 | ||
2035 | if (GET_MODE (src) == VOIDmode | |
2036 | || !validate_subreg (mode, src_mode, src, byte)) | |
2037 | return NULL_RTX; | |
2038 | ||
2039 | src = force_reg (GET_MODE (src), src); | |
2040 | return gen_rtx_SUBREG (mode, src, byte); | |
2041 | } | |
18b526e8 RS |
2042 | |
2043 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2044 | return NULL_RTX; | |
2045 | ||
2046 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
2047 | && MODES_TIEABLE_P (mode, src_mode)) | |
2048 | { | |
2049 | rtx x = gen_lowpart_common (mode, src); | |
2050 | if (x) | |
2051 | return x; | |
2052 | } | |
2053 | ||
2054 | src_int_mode = int_mode_for_mode (src_mode); | |
2055 | int_mode = int_mode_for_mode (mode); | |
2056 | if (src_int_mode == BLKmode || int_mode == BLKmode) | |
2057 | return NULL_RTX; | |
2058 | ||
2059 | if (!MODES_TIEABLE_P (src_int_mode, src_mode)) | |
2060 | return NULL_RTX; | |
2061 | if (!MODES_TIEABLE_P (int_mode, mode)) | |
2062 | return NULL_RTX; | |
2063 | ||
2064 | src = gen_lowpart (src_int_mode, src); | |
2065 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2066 | src = gen_lowpart (mode, src); | |
2067 | return src; | |
2068 | } | |
2069 | \f | |
44037a66 TG |
2070 | /* Add INC into TARGET. */ |
2071 | ||
2072 | void | |
502b8322 | 2073 | expand_inc (rtx target, rtx inc) |
44037a66 TG |
2074 | { |
2075 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2076 | target, inc, | |
2077 | target, 0, OPTAB_LIB_WIDEN); | |
2078 | if (value != target) | |
2079 | emit_move_insn (target, value); | |
2080 | } | |
2081 | ||
2082 | /* Subtract DEC from TARGET. */ | |
2083 | ||
2084 | void | |
502b8322 | 2085 | expand_dec (rtx target, rtx dec) |
44037a66 TG |
2086 | { |
2087 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2088 | target, dec, | |
2089 | target, 0, OPTAB_LIB_WIDEN); | |
2090 | if (value != target) | |
2091 | emit_move_insn (target, value); | |
2092 | } | |
2093 | \f | |
2094 | /* Output a shift instruction for expression code CODE, | |
2095 | with SHIFTED being the rtx for the value to shift, | |
86529a49 | 2096 | and AMOUNT the rtx for the amount to shift by. |
44037a66 TG |
2097 | Store the result in the rtx TARGET, if that is convenient. |
2098 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2099 | Return the rtx for where the value is. */ | |
2100 | ||
86529a49 | 2101 | static rtx |
ef4bddc2 | 2102 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
86529a49 | 2103 | rtx amount, rtx target, int unsignedp) |
44037a66 | 2104 | { |
b3694847 SS |
2105 | rtx op1, temp = 0; |
2106 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2107 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
71d46ca5 MM |
2108 | optab lshift_optab = ashl_optab; |
2109 | optab rshift_arith_optab = ashr_optab; | |
2110 | optab rshift_uns_optab = lshr_optab; | |
2111 | optab lrotate_optab = rotl_optab; | |
2112 | optab rrotate_optab = rotr_optab; | |
ef4bddc2 RS |
2113 | machine_mode op1_mode; |
2114 | machine_mode scalar_mode = mode; | |
d858f359 | 2115 | int attempt; |
f40751dd | 2116 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 2117 | |
a4ee446d JJ |
2118 | if (VECTOR_MODE_P (mode)) |
2119 | scalar_mode = GET_MODE_INNER (mode); | |
86529a49 | 2120 | op1 = amount; |
71d46ca5 MM |
2121 | op1_mode = GET_MODE (op1); |
2122 | ||
2123 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2124 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2125 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2126 | { | |
2127 | lshift_optab = vashl_optab; | |
2128 | rshift_arith_optab = vashr_optab; | |
2129 | rshift_uns_optab = vlshr_optab; | |
2130 | lrotate_optab = vrotl_optab; | |
2131 | rrotate_optab = vrotr_optab; | |
2132 | } | |
2133 | ||
44037a66 TG |
2134 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2135 | and shifted in the other direction; but that does not work | |
2136 | on all machines. */ | |
2137 | ||
166cdf4a RH |
2138 | if (SHIFT_COUNT_TRUNCATED) |
2139 | { | |
481683e1 | 2140 | if (CONST_INT_P (op1) |
0fb7aeda | 2141 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
a4ee446d | 2142 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) |
0fb7aeda | 2143 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
a4ee446d | 2144 | % GET_MODE_BITSIZE (scalar_mode)); |
166cdf4a | 2145 | else if (GET_CODE (op1) == SUBREG |
c1cb09ad | 2146 | && subreg_lowpart_p (op1) |
7afe2801 DM |
2147 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2148 | && SCALAR_INT_MODE_P (GET_MODE (op1))) | |
166cdf4a RH |
2149 | op1 = SUBREG_REG (op1); |
2150 | } | |
2ab0a5c4 | 2151 | |
75776c6d JJ |
2152 | /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, |
2153 | prefer left rotation, if op1 is from bitsize / 2 + 1 to | |
2154 | bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 | |
2155 | amount instead. */ | |
2156 | if (rotate | |
2157 | && CONST_INT_P (op1) | |
a4ee446d JJ |
2158 | && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, |
2159 | GET_MODE_BITSIZE (scalar_mode) - 1)) | |
75776c6d | 2160 | { |
a4ee446d | 2161 | op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); |
75776c6d JJ |
2162 | left = !left; |
2163 | code = left ? LROTATE_EXPR : RROTATE_EXPR; | |
2164 | } | |
2165 | ||
44037a66 TG |
2166 | if (op1 == const0_rtx) |
2167 | return shifted; | |
2168 | ||
15bad393 RS |
2169 | /* Check whether its cheaper to implement a left shift by a constant |
2170 | bit count by a sequence of additions. */ | |
2171 | if (code == LSHIFT_EXPR | |
481683e1 | 2172 | && CONST_INT_P (op1) |
15bad393 | 2173 | && INTVAL (op1) > 0 |
a4ee446d | 2174 | && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) |
cb2eb96f | 2175 | && INTVAL (op1) < MAX_BITS_PER_WORD |
5322d07e NF |
2176 | && (shift_cost (speed, mode, INTVAL (op1)) |
2177 | > INTVAL (op1) * add_cost (speed, mode)) | |
2178 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) | |
15bad393 RS |
2179 | { |
2180 | int i; | |
2181 | for (i = 0; i < INTVAL (op1); i++) | |
2182 | { | |
2183 | temp = force_reg (mode, shifted); | |
2184 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2185 | unsignedp, OPTAB_LIB_WIDEN); | |
2186 | } | |
2187 | return shifted; | |
2188 | } | |
2189 | ||
d858f359 | 2190 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
44037a66 TG |
2191 | { |
2192 | enum optab_methods methods; | |
2193 | ||
d858f359 | 2194 | if (attempt == 0) |
44037a66 | 2195 | methods = OPTAB_DIRECT; |
d858f359 | 2196 | else if (attempt == 1) |
44037a66 TG |
2197 | methods = OPTAB_WIDEN; |
2198 | else | |
2199 | methods = OPTAB_LIB_WIDEN; | |
2200 | ||
2201 | if (rotate) | |
2202 | { | |
2203 | /* Widening does not work for rotation. */ | |
2204 | if (methods == OPTAB_WIDEN) | |
2205 | continue; | |
2206 | else if (methods == OPTAB_LIB_WIDEN) | |
cbec710e | 2207 | { |
39e71615 | 2208 | /* If we have been unable to open-code this by a rotation, |
cbec710e | 2209 | do it as the IOR of two shifts. I.e., to rotate A |
ae6fa899 JJ |
2210 | by N bits, compute |
2211 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) | |
cbec710e RK |
2212 | where C is the bitsize of A. |
2213 | ||
2214 | It is theoretically possible that the target machine might | |
2215 | not be able to perform either shift and hence we would | |
2216 | be making two libcalls rather than just the one for the | |
2217 | shift (similarly if IOR could not be done). We will allow | |
2218 | this extremely unlikely lossage to avoid complicating the | |
2219 | code below. */ | |
2220 | ||
39e71615 | 2221 | rtx subtarget = target == shifted ? 0 : target; |
86529a49 | 2222 | rtx new_amount, other_amount; |
39e71615 | 2223 | rtx temp1; |
86529a49 RG |
2224 | |
2225 | new_amount = op1; | |
ae6fa899 JJ |
2226 | if (op1 == const0_rtx) |
2227 | return shifted; | |
2228 | else if (CONST_INT_P (op1)) | |
a4ee446d | 2229 | other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) |
5c049507 RG |
2230 | - INTVAL (op1)); |
2231 | else | |
ae6fa899 JJ |
2232 | { |
2233 | other_amount | |
2234 | = simplify_gen_unary (NEG, GET_MODE (op1), | |
2235 | op1, GET_MODE (op1)); | |
a4ee446d | 2236 | HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; |
ae6fa899 | 2237 | other_amount |
69a59f0f RS |
2238 | = simplify_gen_binary (AND, GET_MODE (op1), other_amount, |
2239 | gen_int_mode (mask, GET_MODE (op1))); | |
ae6fa899 | 2240 | } |
39e71615 RK |
2241 | |
2242 | shifted = force_reg (mode, shifted); | |
2243 | ||
86529a49 RG |
2244 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2245 | mode, shifted, new_amount, 0, 1); | |
2246 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, | |
2247 | mode, shifted, other_amount, | |
2248 | subtarget, 1); | |
39e71615 RK |
2249 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2250 | unsignedp, methods); | |
cbec710e | 2251 | } |
44037a66 TG |
2252 | |
2253 | temp = expand_binop (mode, | |
71d46ca5 | 2254 | left ? lrotate_optab : rrotate_optab, |
44037a66 TG |
2255 | shifted, op1, target, unsignedp, methods); |
2256 | } | |
2257 | else if (unsignedp) | |
a34958c9 | 2258 | temp = expand_binop (mode, |
71d46ca5 | 2259 | left ? lshift_optab : rshift_uns_optab, |
a34958c9 | 2260 | shifted, op1, target, unsignedp, methods); |
44037a66 TG |
2261 | |
2262 | /* Do arithmetic shifts. | |
2263 | Also, if we are going to widen the operand, we can just as well | |
2264 | use an arithmetic right-shift instead of a logical one. */ | |
2265 | if (temp == 0 && ! rotate | |
2266 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2267 | { | |
2268 | enum optab_methods methods1 = methods; | |
2269 | ||
2270 | /* If trying to widen a log shift to an arithmetic shift, | |
2271 | don't accept an arithmetic shift of the same size. */ | |
2272 | if (unsignedp) | |
2273 | methods1 = OPTAB_MUST_WIDEN; | |
2274 | ||
2275 | /* Arithmetic shift */ | |
2276 | ||
2277 | temp = expand_binop (mode, | |
71d46ca5 | 2278 | left ? lshift_optab : rshift_arith_optab, |
44037a66 TG |
2279 | shifted, op1, target, unsignedp, methods1); |
2280 | } | |
2281 | ||
711a5e64 | 2282 | /* We used to try extzv here for logical right shifts, but that was |
c410d49e | 2283 | only useful for one machine, the VAX, and caused poor code |
711a5e64 RK |
2284 | generation there for lshrdi3, so the code was deleted and a |
2285 | define_expand for lshrsi3 was added to vax.md. */ | |
44037a66 TG |
2286 | } |
2287 | ||
5b0264cb | 2288 | gcc_assert (temp); |
44037a66 TG |
2289 | return temp; |
2290 | } | |
eb6c3df1 RG |
2291 | |
2292 | /* Output a shift instruction for expression code CODE, | |
2293 | with SHIFTED being the rtx for the value to shift, | |
2294 | and AMOUNT the amount to shift by. | |
2295 | Store the result in the rtx TARGET, if that is convenient. | |
2296 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2297 | Return the rtx for where the value is. */ | |
2298 | ||
2299 | rtx | |
ef4bddc2 | 2300 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
eb6c3df1 RG |
2301 | int amount, rtx target, int unsignedp) |
2302 | { | |
86529a49 RG |
2303 | return expand_shift_1 (code, mode, |
2304 | shifted, GEN_INT (amount), target, unsignedp); | |
2305 | } | |
2306 | ||
2307 | /* Output a shift instruction for expression code CODE, | |
2308 | with SHIFTED being the rtx for the value to shift, | |
2309 | and AMOUNT the tree for the amount to shift by. | |
2310 | Store the result in the rtx TARGET, if that is convenient. | |
2311 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2312 | Return the rtx for where the value is. */ | |
2313 | ||
2314 | rtx | |
ef4bddc2 | 2315 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
86529a49 RG |
2316 | tree amount, rtx target, int unsignedp) |
2317 | { | |
2318 | return expand_shift_1 (code, mode, | |
2319 | shifted, expand_normal (amount), target, unsignedp); | |
eb6c3df1 | 2320 | } |
86529a49 | 2321 | |
44037a66 | 2322 | \f |
8efc8980 RS |
2323 | /* Indicates the type of fixup needed after a constant multiplication. |
2324 | BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that | |
2325 | the result should be negated, and ADD_VARIANT means that the | |
2326 | multiplicand should be added to the result. */ | |
2327 | enum mult_variant {basic_variant, negate_variant, add_variant}; | |
2328 | ||
41c64ac0 | 2329 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
ef4bddc2 RS |
2330 | const struct mult_cost *, machine_mode mode); |
2331 | static bool choose_mult_variant (machine_mode, HOST_WIDE_INT, | |
f258e38b | 2332 | struct algorithm *, enum mult_variant *, int); |
ef4bddc2 | 2333 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
8efc8980 | 2334 | const struct algorithm *, enum mult_variant); |
502b8322 | 2335 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
ef4bddc2 RS |
2336 | static rtx extract_high_half (machine_mode, rtx); |
2337 | static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int); | |
2338 | static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx, | |
8efc8980 | 2339 | int, int); |
44037a66 | 2340 | /* Compute and return the best algorithm for multiplying by T. |
7963ac37 RK |
2341 | The algorithm must cost less than cost_limit |
2342 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
41c64ac0 RS |
2343 | other field of the returned struct are undefined. |
2344 | MODE is the machine mode of the multiplication. */ | |
44037a66 | 2345 | |
819126a6 | 2346 | static void |
502b8322 | 2347 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
ef4bddc2 | 2348 | const struct mult_cost *cost_limit, machine_mode mode) |
44037a66 | 2349 | { |
b2fb324c | 2350 | int m; |
52786026 | 2351 | struct algorithm *alg_in, *best_alg; |
26276705 RS |
2352 | struct mult_cost best_cost; |
2353 | struct mult_cost new_limit; | |
2354 | int op_cost, op_latency; | |
ef268d34 | 2355 | unsigned HOST_WIDE_INT orig_t = t; |
b2fb324c | 2356 | unsigned HOST_WIDE_INT q; |
84ddb681 | 2357 | int maxm, hash_index; |
7b13ee6b KH |
2358 | bool cache_hit = false; |
2359 | enum alg_code cache_alg = alg_zero; | |
f40751dd | 2360 | bool speed = optimize_insn_for_speed_p (); |
ef4bddc2 | 2361 | machine_mode imode; |
5322d07e | 2362 | struct alg_hash_entry *entry_ptr; |
44037a66 | 2363 | |
7963ac37 RK |
2364 | /* Indicate that no algorithm is yet found. If no algorithm |
2365 | is found, this value will be returned and indicate failure. */ | |
26276705 | 2366 | alg_out->cost.cost = cost_limit->cost + 1; |
3ab0f290 | 2367 | alg_out->cost.latency = cost_limit->latency + 1; |
44037a66 | 2368 | |
26276705 RS |
2369 | if (cost_limit->cost < 0 |
2370 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
819126a6 | 2371 | return; |
44037a66 | 2372 | |
84ddb681 RH |
2373 | /* Be prepared for vector modes. */ |
2374 | imode = GET_MODE_INNER (mode); | |
2375 | if (imode == VOIDmode) | |
2376 | imode = mode; | |
2377 | ||
2378 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); | |
2379 | ||
0792ab19 | 2380 | /* Restrict the bits of "t" to the multiplication's mode. */ |
84ddb681 | 2381 | t &= GET_MODE_MASK (imode); |
0792ab19 | 2382 | |
b385aeda RK |
2383 | /* t == 1 can be done in zero cost. */ |
2384 | if (t == 1) | |
b2fb324c | 2385 | { |
819126a6 | 2386 | alg_out->ops = 1; |
26276705 RS |
2387 | alg_out->cost.cost = 0; |
2388 | alg_out->cost.latency = 0; | |
819126a6 RK |
2389 | alg_out->op[0] = alg_m; |
2390 | return; | |
b2fb324c RK |
2391 | } |
2392 | ||
b385aeda RK |
2393 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2394 | fail now. */ | |
819126a6 | 2395 | if (t == 0) |
b385aeda | 2396 | { |
5322d07e | 2397 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
819126a6 | 2398 | return; |
b385aeda RK |
2399 | else |
2400 | { | |
819126a6 | 2401 | alg_out->ops = 1; |
5322d07e NF |
2402 | alg_out->cost.cost = zero_cost (speed); |
2403 | alg_out->cost.latency = zero_cost (speed); | |
819126a6 RK |
2404 | alg_out->op[0] = alg_zero; |
2405 | return; | |
b385aeda RK |
2406 | } |
2407 | } | |
2408 | ||
52786026 RK |
2409 | /* We'll be needing a couple extra algorithm structures now. */ |
2410 | ||
1b4572a8 KG |
2411 | alg_in = XALLOCA (struct algorithm); |
2412 | best_alg = XALLOCA (struct algorithm); | |
26276705 | 2413 | best_cost = *cost_limit; |
52786026 | 2414 | |
7b13ee6b | 2415 | /* Compute the hash index. */ |
f40751dd | 2416 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7b13ee6b KH |
2417 | |
2418 | /* See if we already know what to do for T. */ | |
5322d07e NF |
2419 | entry_ptr = alg_hash_entry_ptr (hash_index); |
2420 | if (entry_ptr->t == t | |
2421 | && entry_ptr->mode == mode | |
2422 | && entry_ptr->mode == mode | |
2423 | && entry_ptr->speed == speed | |
2424 | && entry_ptr->alg != alg_unknown) | |
7b13ee6b | 2425 | { |
5322d07e | 2426 | cache_alg = entry_ptr->alg; |
0178027c KH |
2427 | |
2428 | if (cache_alg == alg_impossible) | |
7b13ee6b | 2429 | { |
0178027c | 2430 | /* The cache tells us that it's impossible to synthesize |
5322d07e NF |
2431 | multiplication by T within entry_ptr->cost. */ |
2432 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) | |
0178027c KH |
2433 | /* COST_LIMIT is at least as restrictive as the one |
2434 | recorded in the hash table, in which case we have no | |
2435 | hope of synthesizing a multiplication. Just | |
2436 | return. */ | |
2437 | return; | |
2438 | ||
2439 | /* If we get here, COST_LIMIT is less restrictive than the | |
2440 | one recorded in the hash table, so we may be able to | |
2441 | synthesize a multiplication. Proceed as if we didn't | |
2442 | have the cache entry. */ | |
2443 | } | |
2444 | else | |
2445 | { | |
5322d07e | 2446 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
0178027c KH |
2447 | /* The cached algorithm shows that this multiplication |
2448 | requires more cost than COST_LIMIT. Just return. This | |
2449 | way, we don't clobber this cache entry with | |
2450 | alg_impossible but retain useful information. */ | |
2451 | return; | |
7b13ee6b | 2452 | |
0178027c KH |
2453 | cache_hit = true; |
2454 | ||
2455 | switch (cache_alg) | |
2456 | { | |
2457 | case alg_shift: | |
2458 | goto do_alg_shift; | |
7b13ee6b | 2459 | |
0178027c KH |
2460 | case alg_add_t_m2: |
2461 | case alg_sub_t_m2: | |
2462 | goto do_alg_addsub_t_m2; | |
7b13ee6b | 2463 | |
0178027c KH |
2464 | case alg_add_factor: |
2465 | case alg_sub_factor: | |
2466 | goto do_alg_addsub_factor; | |
7b13ee6b | 2467 | |
0178027c KH |
2468 | case alg_add_t2_m: |
2469 | goto do_alg_add_t2_m; | |
7b13ee6b | 2470 | |
0178027c KH |
2471 | case alg_sub_t2_m: |
2472 | goto do_alg_sub_t2_m; | |
2473 | ||
2474 | default: | |
2475 | gcc_unreachable (); | |
2476 | } | |
7b13ee6b KH |
2477 | } |
2478 | } | |
2479 | ||
b385aeda RK |
2480 | /* If we have a group of zero bits at the low-order part of T, try |
2481 | multiplying by the remaining bits and then doing a shift. */ | |
2482 | ||
b2fb324c | 2483 | if ((t & 1) == 0) |
44037a66 | 2484 | { |
7b13ee6b | 2485 | do_alg_shift: |
b2fb324c | 2486 | m = floor_log2 (t & -t); /* m = number of low zero bits */ |
0792ab19 | 2487 | if (m < maxm) |
44037a66 | 2488 | { |
02a65aef | 2489 | q = t >> m; |
15bad393 RS |
2490 | /* The function expand_shift will choose between a shift and |
2491 | a sequence of additions, so the observed cost is given as | |
5322d07e NF |
2492 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2493 | op_cost = m * add_cost (speed, mode); | |
2494 | if (shift_cost (speed, mode, m) < op_cost) | |
2495 | op_cost = shift_cost (speed, mode, m); | |
26276705 RS |
2496 | new_limit.cost = best_cost.cost - op_cost; |
2497 | new_limit.latency = best_cost.latency - op_cost; | |
2498 | synth_mult (alg_in, q, &new_limit, mode); | |
2499 | ||
2500 | alg_in->cost.cost += op_cost; | |
2501 | alg_in->cost.latency += op_cost; | |
2502 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
02a65aef R |
2503 | { |
2504 | struct algorithm *x; | |
26276705 | 2505 | best_cost = alg_in->cost; |
02a65aef R |
2506 | x = alg_in, alg_in = best_alg, best_alg = x; |
2507 | best_alg->log[best_alg->ops] = m; | |
2508 | best_alg->op[best_alg->ops] = alg_shift; | |
02a65aef | 2509 | } |
ddc2690a KH |
2510 | |
2511 | /* See if treating ORIG_T as a signed number yields a better | |
2512 | sequence. Try this sequence only for a negative ORIG_T | |
2513 | as it would be useless for a non-negative ORIG_T. */ | |
2514 | if ((HOST_WIDE_INT) orig_t < 0) | |
2515 | { | |
2516 | /* Shift ORIG_T as follows because a right shift of a | |
2517 | negative-valued signed type is implementation | |
2518 | defined. */ | |
2519 | q = ~(~orig_t >> m); | |
2520 | /* The function expand_shift will choose between a shift | |
2521 | and a sequence of additions, so the observed cost is | |
5322d07e NF |
2522 | given as MIN (m * add_cost(speed, mode), |
2523 | shift_cost(speed, mode, m)). */ | |
2524 | op_cost = m * add_cost (speed, mode); | |
2525 | if (shift_cost (speed, mode, m) < op_cost) | |
2526 | op_cost = shift_cost (speed, mode, m); | |
ddc2690a KH |
2527 | new_limit.cost = best_cost.cost - op_cost; |
2528 | new_limit.latency = best_cost.latency - op_cost; | |
2529 | synth_mult (alg_in, q, &new_limit, mode); | |
2530 | ||
2531 | alg_in->cost.cost += op_cost; | |
2532 | alg_in->cost.latency += op_cost; | |
2533 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2534 | { | |
2535 | struct algorithm *x; | |
2536 | best_cost = alg_in->cost; | |
2537 | x = alg_in, alg_in = best_alg, best_alg = x; | |
2538 | best_alg->log[best_alg->ops] = m; | |
2539 | best_alg->op[best_alg->ops] = alg_shift; | |
2540 | } | |
2541 | } | |
819126a6 | 2542 | } |
7b13ee6b KH |
2543 | if (cache_hit) |
2544 | goto done; | |
819126a6 RK |
2545 | } |
2546 | ||
2547 | /* If we have an odd number, add or subtract one. */ | |
2548 | if ((t & 1) != 0) | |
2549 | { | |
2550 | unsigned HOST_WIDE_INT w; | |
2551 | ||
7b13ee6b | 2552 | do_alg_addsub_t_m2: |
819126a6 RK |
2553 | for (w = 1; (w & t) != 0; w <<= 1) |
2554 | ; | |
31031edd | 2555 | /* If T was -1, then W will be zero after the loop. This is another |
c410d49e | 2556 | case where T ends with ...111. Handling this with (T + 1) and |
31031edd JL |
2557 | subtract 1 produces slightly better code and results in algorithm |
2558 | selection much faster than treating it like the ...0111 case | |
2559 | below. */ | |
2560 | if (w == 0 | |
2561 | || (w > 2 | |
2562 | /* Reject the case where t is 3. | |
2563 | Thus we prefer addition in that case. */ | |
2564 | && t != 3)) | |
819126a6 RK |
2565 | { |
2566 | /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ | |
2567 | ||
5322d07e | 2568 | op_cost = add_cost (speed, mode); |
26276705 RS |
2569 | new_limit.cost = best_cost.cost - op_cost; |
2570 | new_limit.latency = best_cost.latency - op_cost; | |
2571 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
b2fb324c | 2572 | |
26276705 RS |
2573 | alg_in->cost.cost += op_cost; |
2574 | alg_in->cost.latency += op_cost; | |
2575 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2576 | { |
b2fb324c | 2577 | struct algorithm *x; |
26276705 | 2578 | best_cost = alg_in->cost; |
b2fb324c | 2579 | x = alg_in, alg_in = best_alg, best_alg = x; |
819126a6 RK |
2580 | best_alg->log[best_alg->ops] = 0; |
2581 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
44037a66 | 2582 | } |
44037a66 | 2583 | } |
819126a6 RK |
2584 | else |
2585 | { | |
2586 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ | |
44037a66 | 2587 | |
5322d07e | 2588 | op_cost = add_cost (speed, mode); |
26276705 RS |
2589 | new_limit.cost = best_cost.cost - op_cost; |
2590 | new_limit.latency = best_cost.latency - op_cost; | |
2591 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
819126a6 | 2592 | |
26276705 RS |
2593 | alg_in->cost.cost += op_cost; |
2594 | alg_in->cost.latency += op_cost; | |
2595 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
819126a6 RK |
2596 | { |
2597 | struct algorithm *x; | |
26276705 | 2598 | best_cost = alg_in->cost; |
819126a6 RK |
2599 | x = alg_in, alg_in = best_alg, best_alg = x; |
2600 | best_alg->log[best_alg->ops] = 0; | |
2601 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
819126a6 RK |
2602 | } |
2603 | } | |
ef268d34 KH |
2604 | |
2605 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2606 | quickly with a - a * n for some appropriate constant n. */ | |
2607 | m = exact_log2 (-orig_t + 1); | |
2608 | if (m >= 0 && m < maxm) | |
2609 | { | |
5322d07e | 2610 | op_cost = shiftsub1_cost (speed, mode, m); |
ef268d34 KH |
2611 | new_limit.cost = best_cost.cost - op_cost; |
2612 | new_limit.latency = best_cost.latency - op_cost; | |
84ddb681 RH |
2613 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
2614 | &new_limit, mode); | |
ef268d34 KH |
2615 | |
2616 | alg_in->cost.cost += op_cost; | |
2617 | alg_in->cost.latency += op_cost; | |
2618 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2619 | { | |
2620 | struct algorithm *x; | |
2621 | best_cost = alg_in->cost; | |
2622 | x = alg_in, alg_in = best_alg, best_alg = x; | |
2623 | best_alg->log[best_alg->ops] = m; | |
2624 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
2625 | } | |
2626 | } | |
2627 | ||
7b13ee6b KH |
2628 | if (cache_hit) |
2629 | goto done; | |
819126a6 | 2630 | } |
63610db9 | 2631 | |
44037a66 | 2632 | /* Look for factors of t of the form |
7963ac37 | 2633 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
44037a66 | 2634 | If we find such a factor, we can multiply by t using an algorithm that |
7963ac37 | 2635 | multiplies by q, shift the result by m and add/subtract it to itself. |
44037a66 | 2636 | |
7963ac37 RK |
2637 | We search for large factors first and loop down, even if large factors |
2638 | are less probable than small; if we find a large factor we will find a | |
2639 | good sequence quickly, and therefore be able to prune (by decreasing | |
2640 | COST_LIMIT) the search. */ | |
2641 | ||
7b13ee6b | 2642 | do_alg_addsub_factor: |
7963ac37 | 2643 | for (m = floor_log2 (t - 1); m >= 2; m--) |
44037a66 | 2644 | { |
7963ac37 | 2645 | unsigned HOST_WIDE_INT d; |
44037a66 | 2646 | |
7963ac37 | 2647 | d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; |
7b13ee6b KH |
2648 | if (t % d == 0 && t > d && m < maxm |
2649 | && (!cache_hit || cache_alg == alg_add_factor)) | |
44037a66 | 2650 | { |
26276705 RS |
2651 | /* If the target has a cheap shift-and-add instruction use |
2652 | that in preference to a shift insn followed by an add insn. | |
2653 | Assume that the shift-and-add is "atomic" with a latency | |
a37739c1 | 2654 | equal to its cost, otherwise assume that on superscalar |
26276705 RS |
2655 | hardware the shift may be executed concurrently with the |
2656 | earlier steps in the algorithm. */ | |
5322d07e NF |
2657 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2658 | if (shiftadd_cost (speed, mode, m) < op_cost) | |
26276705 | 2659 | { |
5322d07e | 2660 | op_cost = shiftadd_cost (speed, mode, m); |
26276705 RS |
2661 | op_latency = op_cost; |
2662 | } | |
2663 | else | |
5322d07e | 2664 | op_latency = add_cost (speed, mode); |
26276705 RS |
2665 | |
2666 | new_limit.cost = best_cost.cost - op_cost; | |
2667 | new_limit.latency = best_cost.latency - op_latency; | |
2668 | synth_mult (alg_in, t / d, &new_limit, mode); | |
44037a66 | 2669 | |
26276705 RS |
2670 | alg_in->cost.cost += op_cost; |
2671 | alg_in->cost.latency += op_latency; | |
2672 | if (alg_in->cost.latency < op_cost) | |
2673 | alg_in->cost.latency = op_cost; | |
2674 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2675 | { |
7963ac37 | 2676 | struct algorithm *x; |
26276705 | 2677 | best_cost = alg_in->cost; |
7963ac37 | 2678 | x = alg_in, alg_in = best_alg, best_alg = x; |
b385aeda | 2679 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2680 | best_alg->op[best_alg->ops] = alg_add_factor; |
44037a66 | 2681 | } |
c0b262c1 TG |
2682 | /* Other factors will have been taken care of in the recursion. */ |
2683 | break; | |
44037a66 TG |
2684 | } |
2685 | ||
7963ac37 | 2686 | d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; |
7b13ee6b KH |
2687 | if (t % d == 0 && t > d && m < maxm |
2688 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
44037a66 | 2689 | { |
26276705 RS |
2690 | /* If the target has a cheap shift-and-subtract insn use |
2691 | that in preference to a shift insn followed by a sub insn. | |
2692 | Assume that the shift-and-sub is "atomic" with a latency | |
2693 | equal to it's cost, otherwise assume that on superscalar | |
2694 | hardware the shift may be executed concurrently with the | |
2695 | earlier steps in the algorithm. */ | |
5322d07e NF |
2696 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2697 | if (shiftsub0_cost (speed, mode, m) < op_cost) | |
26276705 | 2698 | { |
5322d07e | 2699 | op_cost = shiftsub0_cost (speed, mode, m); |
26276705 RS |
2700 | op_latency = op_cost; |
2701 | } | |
2702 | else | |
5322d07e | 2703 | op_latency = add_cost (speed, mode); |
26276705 RS |
2704 | |
2705 | new_limit.cost = best_cost.cost - op_cost; | |
417c735c | 2706 | new_limit.latency = best_cost.latency - op_latency; |
26276705 | 2707 | synth_mult (alg_in, t / d, &new_limit, mode); |
44037a66 | 2708 | |
26276705 RS |
2709 | alg_in->cost.cost += op_cost; |
2710 | alg_in->cost.latency += op_latency; | |
2711 | if (alg_in->cost.latency < op_cost) | |
2712 | alg_in->cost.latency = op_cost; | |
2713 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2714 | { |
7963ac37 | 2715 | struct algorithm *x; |
26276705 | 2716 | best_cost = alg_in->cost; |
7963ac37 | 2717 | x = alg_in, alg_in = best_alg, best_alg = x; |
b385aeda | 2718 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2719 | best_alg->op[best_alg->ops] = alg_sub_factor; |
44037a66 | 2720 | } |
c0b262c1 | 2721 | break; |
44037a66 TG |
2722 | } |
2723 | } | |
7b13ee6b KH |
2724 | if (cache_hit) |
2725 | goto done; | |
44037a66 | 2726 | |
7963ac37 RK |
2727 | /* Try shift-and-add (load effective address) instructions, |
2728 | i.e. do a*3, a*5, a*9. */ | |
2729 | if ((t & 1) != 0) | |
2730 | { | |
7b13ee6b | 2731 | do_alg_add_t2_m: |
7963ac37 RK |
2732 | q = t - 1; |
2733 | q = q & -q; | |
2734 | m = exact_log2 (q); | |
0792ab19 | 2735 | if (m >= 0 && m < maxm) |
b385aeda | 2736 | { |
5322d07e | 2737 | op_cost = shiftadd_cost (speed, mode, m); |
26276705 RS |
2738 | new_limit.cost = best_cost.cost - op_cost; |
2739 | new_limit.latency = best_cost.latency - op_cost; | |
2740 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2741 | ||
2742 | alg_in->cost.cost += op_cost; | |
2743 | alg_in->cost.latency += op_cost; | |
2744 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb RK |
2745 | { |
2746 | struct algorithm *x; | |
26276705 | 2747 | best_cost = alg_in->cost; |
5eebe2eb RK |
2748 | x = alg_in, alg_in = best_alg, best_alg = x; |
2749 | best_alg->log[best_alg->ops] = m; | |
819126a6 | 2750 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
5eebe2eb | 2751 | } |
7963ac37 | 2752 | } |
7b13ee6b KH |
2753 | if (cache_hit) |
2754 | goto done; | |
44037a66 | 2755 | |
7b13ee6b | 2756 | do_alg_sub_t2_m: |
7963ac37 RK |
2757 | q = t + 1; |
2758 | q = q & -q; | |
2759 | m = exact_log2 (q); | |
0792ab19 | 2760 | if (m >= 0 && m < maxm) |
b385aeda | 2761 | { |
5322d07e | 2762 | op_cost = shiftsub0_cost (speed, mode, m); |
26276705 RS |
2763 | new_limit.cost = best_cost.cost - op_cost; |
2764 | new_limit.latency = best_cost.latency - op_cost; | |
2765 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2766 | ||
2767 | alg_in->cost.cost += op_cost; | |
2768 | alg_in->cost.latency += op_cost; | |
2769 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb RK |
2770 | { |
2771 | struct algorithm *x; | |
26276705 | 2772 | best_cost = alg_in->cost; |
5eebe2eb RK |
2773 | x = alg_in, alg_in = best_alg, best_alg = x; |
2774 | best_alg->log[best_alg->ops] = m; | |
819126a6 | 2775 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
5eebe2eb | 2776 | } |
7963ac37 | 2777 | } |
7b13ee6b KH |
2778 | if (cache_hit) |
2779 | goto done; | |
7963ac37 | 2780 | } |
44037a66 | 2781 | |
7b13ee6b | 2782 | done: |
3ab0f290 DJ |
2783 | /* If best_cost has not decreased, we have not found any algorithm. */ |
2784 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
0178027c KH |
2785 | { |
2786 | /* We failed to find an algorithm. Record alg_impossible for | |
2787 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
2788 | we are asked to find an algorithm for T within the same or | |
2789 | lower COST_LIMIT, we can immediately return to the | |
2790 | caller. */ | |
5322d07e NF |
2791 | entry_ptr->t = t; |
2792 | entry_ptr->mode = mode; | |
2793 | entry_ptr->speed = speed; | |
2794 | entry_ptr->alg = alg_impossible; | |
2795 | entry_ptr->cost = *cost_limit; | |
0178027c KH |
2796 | return; |
2797 | } | |
3ab0f290 | 2798 | |
7b13ee6b KH |
2799 | /* Cache the result. */ |
2800 | if (!cache_hit) | |
2801 | { | |
5322d07e NF |
2802 | entry_ptr->t = t; |
2803 | entry_ptr->mode = mode; | |
2804 | entry_ptr->speed = speed; | |
2805 | entry_ptr->alg = best_alg->op[best_alg->ops]; | |
2806 | entry_ptr->cost.cost = best_cost.cost; | |
2807 | entry_ptr->cost.latency = best_cost.latency; | |
7b13ee6b KH |
2808 | } |
2809 | ||
52786026 RK |
2810 | /* If we are getting a too long sequence for `struct algorithm' |
2811 | to record, make this search fail. */ | |
2812 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
2813 | return; | |
2814 | ||
819126a6 RK |
2815 | /* Copy the algorithm from temporary space to the space at alg_out. |
2816 | We avoid using structure assignment because the majority of | |
2817 | best_alg is normally undefined, and this is a critical function. */ | |
2818 | alg_out->ops = best_alg->ops + 1; | |
26276705 | 2819 | alg_out->cost = best_cost; |
4e135bdd KG |
2820 | memcpy (alg_out->op, best_alg->op, |
2821 | alg_out->ops * sizeof *alg_out->op); | |
2822 | memcpy (alg_out->log, best_alg->log, | |
2823 | alg_out->ops * sizeof *alg_out->log); | |
44037a66 TG |
2824 | } |
2825 | \f | |
d1a6adeb | 2826 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
8efc8980 RS |
2827 | Try three variations: |
2828 | ||
2829 | - a shift/add sequence based on VAL itself | |
2830 | - a shift/add sequence based on -VAL, followed by a negation | |
2831 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
2832 | ||
f258e38b UW |
2833 | Return true if the cheapest of these cost less than MULT_COST, |
2834 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
8efc8980 RS |
2835 | |
2836 | static bool | |
ef4bddc2 | 2837 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
f258e38b UW |
2838 | struct algorithm *alg, enum mult_variant *variant, |
2839 | int mult_cost) | |
8efc8980 | 2840 | { |
8efc8980 | 2841 | struct algorithm alg2; |
26276705 RS |
2842 | struct mult_cost limit; |
2843 | int op_cost; | |
f40751dd | 2844 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 2845 | |
18eaea7f RS |
2846 | /* Fail quickly for impossible bounds. */ |
2847 | if (mult_cost < 0) | |
2848 | return false; | |
2849 | ||
2850 | /* Ensure that mult_cost provides a reasonable upper bound. | |
2851 | Any constant multiplication can be performed with less | |
2852 | than 2 * bits additions. */ | |
5322d07e | 2853 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
18eaea7f RS |
2854 | if (mult_cost > op_cost) |
2855 | mult_cost = op_cost; | |
2856 | ||
8efc8980 | 2857 | *variant = basic_variant; |
26276705 RS |
2858 | limit.cost = mult_cost; |
2859 | limit.latency = mult_cost; | |
2860 | synth_mult (alg, val, &limit, mode); | |
8efc8980 RS |
2861 | |
2862 | /* This works only if the inverted value actually fits in an | |
2863 | `unsigned int' */ | |
84ddb681 | 2864 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
8efc8980 | 2865 | { |
c3284718 | 2866 | op_cost = neg_cost (speed, mode); |
26276705 RS |
2867 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2868 | { | |
2869 | limit.cost = alg->cost.cost - op_cost; | |
2870 | limit.latency = alg->cost.latency - op_cost; | |
2871 | } | |
2872 | else | |
2873 | { | |
2874 | limit.cost = mult_cost - op_cost; | |
2875 | limit.latency = mult_cost - op_cost; | |
2876 | } | |
2877 | ||
2878 | synth_mult (&alg2, -val, &limit, mode); | |
2879 | alg2.cost.cost += op_cost; | |
2880 | alg2.cost.latency += op_cost; | |
2881 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2882 | *alg = alg2, *variant = negate_variant; |
2883 | } | |
2884 | ||
2885 | /* This proves very useful for division-by-constant. */ | |
5322d07e | 2886 | op_cost = add_cost (speed, mode); |
26276705 RS |
2887 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2888 | { | |
2889 | limit.cost = alg->cost.cost - op_cost; | |
2890 | limit.latency = alg->cost.latency - op_cost; | |
2891 | } | |
2892 | else | |
2893 | { | |
2894 | limit.cost = mult_cost - op_cost; | |
2895 | limit.latency = mult_cost - op_cost; | |
2896 | } | |
2897 | ||
2898 | synth_mult (&alg2, val - 1, &limit, mode); | |
2899 | alg2.cost.cost += op_cost; | |
2900 | alg2.cost.latency += op_cost; | |
2901 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2902 | *alg = alg2, *variant = add_variant; |
2903 | ||
26276705 | 2904 | return MULT_COST_LESS (&alg->cost, mult_cost); |
8efc8980 RS |
2905 | } |
2906 | ||
2907 | /* A subroutine of expand_mult, used for constant multiplications. | |
2908 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
2909 | convenient. Use the shift/add sequence described by ALG and apply | |
2910 | the final fixup specified by VARIANT. */ | |
2911 | ||
2912 | static rtx | |
ef4bddc2 | 2913 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
8efc8980 RS |
2914 | rtx target, const struct algorithm *alg, |
2915 | enum mult_variant variant) | |
2916 | { | |
2917 | HOST_WIDE_INT val_so_far; | |
f3f6fb16 DM |
2918 | rtx_insn *insn; |
2919 | rtx accum, tem; | |
8efc8980 | 2920 | int opno; |
ef4bddc2 | 2921 | machine_mode nmode; |
8efc8980 | 2922 | |
d448860e JH |
2923 | /* Avoid referencing memory over and over and invalid sharing |
2924 | on SUBREGs. */ | |
2925 | op0 = force_reg (mode, op0); | |
8efc8980 RS |
2926 | |
2927 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
2928 | the first operation. */ | |
2929 | ||
2930 | if (alg->op[0] == alg_zero) | |
2931 | { | |
84ddb681 | 2932 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
8efc8980 RS |
2933 | val_so_far = 0; |
2934 | } | |
2935 | else if (alg->op[0] == alg_m) | |
2936 | { | |
2937 | accum = copy_to_mode_reg (mode, op0); | |
2938 | val_so_far = 1; | |
2939 | } | |
2940 | else | |
5b0264cb | 2941 | gcc_unreachable (); |
8efc8980 RS |
2942 | |
2943 | for (opno = 1; opno < alg->ops; opno++) | |
2944 | { | |
2945 | int log = alg->log[opno]; | |
7c27e184 | 2946 | rtx shift_subtarget = optimize ? 0 : accum; |
8efc8980 RS |
2947 | rtx add_target |
2948 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
7c27e184 | 2949 | && !optimize) |
8efc8980 | 2950 | ? target : 0; |
7c27e184 | 2951 | rtx accum_target = optimize ? 0 : accum; |
7543f918 | 2952 | rtx accum_inner; |
8efc8980 RS |
2953 | |
2954 | switch (alg->op[opno]) | |
2955 | { | |
2956 | case alg_shift: | |
eb6c3df1 | 2957 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
4caa21a1 UB |
2958 | /* REG_EQUAL note will be attached to the following insn. */ |
2959 | emit_move_insn (accum, tem); | |
8efc8980 RS |
2960 | val_so_far <<= log; |
2961 | break; | |
2962 | ||
2963 | case alg_add_t_m2: | |
eb6c3df1 | 2964 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
2965 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
2966 | add_target ? add_target : accum_target); | |
2967 | val_so_far += (HOST_WIDE_INT) 1 << log; | |
2968 | break; | |
2969 | ||
2970 | case alg_sub_t_m2: | |
eb6c3df1 | 2971 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
2972 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
2973 | add_target ? add_target : accum_target); | |
2974 | val_so_far -= (HOST_WIDE_INT) 1 << log; | |
2975 | break; | |
2976 | ||
2977 | case alg_add_t2_m: | |
2978 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 2979 | log, shift_subtarget, 0); |
8efc8980 RS |
2980 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
2981 | add_target ? add_target : accum_target); | |
2982 | val_so_far = (val_so_far << log) + 1; | |
2983 | break; | |
2984 | ||
2985 | case alg_sub_t2_m: | |
2986 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 2987 | log, shift_subtarget, 0); |
8efc8980 RS |
2988 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
2989 | add_target ? add_target : accum_target); | |
2990 | val_so_far = (val_so_far << log) - 1; | |
2991 | break; | |
2992 | ||
2993 | case alg_add_factor: | |
eb6c3df1 | 2994 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 RS |
2995 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
2996 | add_target ? add_target : accum_target); | |
2997 | val_so_far += val_so_far << log; | |
2998 | break; | |
2999 | ||
3000 | case alg_sub_factor: | |
eb6c3df1 | 3001 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 | 3002 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
7c27e184 PB |
3003 | (add_target |
3004 | ? add_target : (optimize ? 0 : tem))); | |
8efc8980 RS |
3005 | val_so_far = (val_so_far << log) - val_so_far; |
3006 | break; | |
3007 | ||
3008 | default: | |
5b0264cb | 3009 | gcc_unreachable (); |
8efc8980 RS |
3010 | } |
3011 | ||
84ddb681 | 3012 | if (SCALAR_INT_MODE_P (mode)) |
8efc8980 | 3013 | { |
84ddb681 RH |
3014 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3015 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
3016 | we've set the inner register and must properly indicate that. */ | |
3017 | tem = op0, nmode = mode; | |
3018 | accum_inner = accum; | |
3019 | if (GET_CODE (accum) == SUBREG) | |
3020 | { | |
3021 | accum_inner = SUBREG_REG (accum); | |
3022 | nmode = GET_MODE (accum_inner); | |
3023 | tem = gen_lowpart (nmode, op0); | |
3024 | } | |
8efc8980 | 3025 | |
84ddb681 RH |
3026 | insn = get_last_insn (); |
3027 | set_dst_reg_note (insn, REG_EQUAL, | |
4789c0ce RS |
3028 | gen_rtx_MULT (nmode, tem, |
3029 | gen_int_mode (val_so_far, nmode)), | |
84ddb681 RH |
3030 | accum_inner); |
3031 | } | |
8efc8980 RS |
3032 | } |
3033 | ||
3034 | if (variant == negate_variant) | |
3035 | { | |
3036 | val_so_far = -val_so_far; | |
3037 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3038 | } | |
3039 | else if (variant == add_variant) | |
3040 | { | |
3041 | val_so_far = val_so_far + 1; | |
3042 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3043 | } | |
3044 | ||
42eb30b5 ZW |
3045 | /* Compare only the bits of val and val_so_far that are significant |
3046 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
84ddb681 RH |
3047 | nmode = GET_MODE_INNER (mode); |
3048 | if (nmode == VOIDmode) | |
3049 | nmode = mode; | |
3050 | val &= GET_MODE_MASK (nmode); | |
3051 | val_so_far &= GET_MODE_MASK (nmode); | |
5b0264cb | 3052 | gcc_assert (val == val_so_far); |
8efc8980 RS |
3053 | |
3054 | return accum; | |
3055 | } | |
3056 | ||
44037a66 TG |
3057 | /* Perform a multiplication and return an rtx for the result. |
3058 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3059 | TARGET is a suggestion for where to store the result (an rtx). | |
3060 | ||
3061 | We check specially for a constant integer as OP1. | |
3062 | If you want this check for OP0 as well, then before calling | |
3063 | you should swap the two operands if OP0 would be constant. */ | |
3064 | ||
3065 | rtx | |
ef4bddc2 | 3066 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
f2593a66 | 3067 | int unsignedp) |
44037a66 | 3068 | { |
8efc8980 RS |
3069 | enum mult_variant variant; |
3070 | struct algorithm algorithm; | |
84ddb681 | 3071 | rtx scalar_op1; |
65dc9350 | 3072 | int max_cost; |
f40751dd | 3073 | bool speed = optimize_insn_for_speed_p (); |
84ddb681 | 3074 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
44037a66 | 3075 | |
84ddb681 RH |
3076 | if (CONSTANT_P (op0)) |
3077 | { | |
3078 | rtx temp = op0; | |
3079 | op0 = op1; | |
3080 | op1 = temp; | |
3081 | } | |
3082 | ||
3083 | /* For vectors, there are several simplifications that can be made if | |
3084 | all elements of the vector constant are identical. */ | |
3085 | scalar_op1 = op1; | |
3086 | if (GET_CODE (op1) == CONST_VECTOR) | |
3087 | { | |
3088 | int i, n = CONST_VECTOR_NUNITS (op1); | |
3089 | scalar_op1 = CONST_VECTOR_ELT (op1, 0); | |
3090 | for (i = 1; i < n; ++i) | |
3091 | if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i))) | |
3092 | goto skip_scalar; | |
3093 | } | |
3094 | ||
3095 | if (INTEGRAL_MODE_P (mode)) | |
3096 | { | |
3097 | rtx fake_reg; | |
caf62455 JDA |
3098 | HOST_WIDE_INT coeff; |
3099 | bool is_neg; | |
84ddb681 RH |
3100 | int mode_bitsize; |
3101 | ||
3102 | if (op1 == CONST0_RTX (mode)) | |
3103 | return op1; | |
3104 | if (op1 == CONST1_RTX (mode)) | |
3105 | return op0; | |
3106 | if (op1 == CONSTM1_RTX (mode)) | |
3107 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, | |
3108 | op0, target, 0); | |
3109 | ||
3110 | if (do_trapv) | |
3111 | goto skip_synth; | |
3112 | ||
66b3ed5f JJ |
3113 | /* If mode is integer vector mode, check if the backend supports |
3114 | vector lshift (by scalar or vector) at all. If not, we can't use | |
3115 | synthetized multiply. */ | |
3116 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT | |
3117 | && optab_handler (vashl_optab, mode) == CODE_FOR_nothing | |
3118 | && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) | |
3119 | goto skip_synth; | |
3120 | ||
84ddb681 RH |
3121 | /* These are the operations that are potentially turned into |
3122 | a sequence of shifts and additions. */ | |
3123 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); | |
65dc9350 RS |
3124 | |
3125 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3126 | less than or equal in size to `unsigned int' this doesn't matter. | |
3127 | If the mode is larger than `unsigned int', then synth_mult works | |
3128 | only if the constant value exactly fits in an `unsigned int' without | |
3129 | any truncation. This means that multiplying by negative values does | |
3130 | not work; results are off by 2^32 on a 32 bit machine. */ | |
84ddb681 | 3131 | if (CONST_INT_P (scalar_op1)) |
58b42e19 | 3132 | { |
84ddb681 RH |
3133 | coeff = INTVAL (scalar_op1); |
3134 | is_neg = coeff < 0; | |
65dc9350 | 3135 | } |
807e902e KZ |
3136 | #if TARGET_SUPPORTS_WIDE_INT |
3137 | else if (CONST_WIDE_INT_P (scalar_op1)) | |
3138 | #else | |
48175537 | 3139 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
807e902e | 3140 | #endif |
65dc9350 | 3141 | { |
807e902e KZ |
3142 | int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode)); |
3143 | /* Perfect power of 2 (other than 1, which is handled above). */ | |
3144 | if (shift > 0) | |
3145 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3146 | shift, target, unsignedp); | |
caf62455 JDA |
3147 | else |
3148 | goto skip_synth; | |
65dc9350 | 3149 | } |
84ddb681 RH |
3150 | else |
3151 | goto skip_synth; | |
b8698a0f | 3152 | |
65dc9350 RS |
3153 | /* We used to test optimize here, on the grounds that it's better to |
3154 | produce a smaller program when -O is not used. But this causes | |
3155 | such a terrible slowdown sometimes that it seems better to always | |
3156 | use synth_mult. */ | |
65dc9350 | 3157 | |
84ddb681 | 3158 | /* Special case powers of two. */ |
be63b77d JJ |
3159 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3160 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) | |
84ddb681 RH |
3161 | return expand_shift (LSHIFT_EXPR, mode, op0, |
3162 | floor_log2 (coeff), target, unsignedp); | |
3163 | ||
3164 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3165 | ||
3166 | /* Attempt to handle multiplication of DImode values by negative | |
3167 | coefficients, by performing the multiplication by a positive | |
3168 | multiplier and then inverting the result. */ | |
84ddb681 RH |
3169 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3170 | { | |
3171 | /* Its safe to use -coeff even for INT_MIN, as the | |
3172 | result is interpreted as an unsigned coefficient. | |
3173 | Exclude cost of op0 from max_cost to match the cost | |
5e839bc8 | 3174 | calculation of the synth_mult. */ |
be63b77d | 3175 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
84ddb681 | 3176 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed) |
c3284718 | 3177 | - neg_cost (speed, mode)); |
be63b77d JJ |
3178 | if (max_cost <= 0) |
3179 | goto skip_synth; | |
3180 | ||
3181 | /* Special case powers of two. */ | |
3182 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3183 | { | |
3184 | rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, | |
3185 | floor_log2 (coeff), target, unsignedp); | |
3186 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3187 | } | |
3188 | ||
3189 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, | |
3190 | max_cost)) | |
84ddb681 | 3191 | { |
be63b77d | 3192 | rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, |
84ddb681 RH |
3193 | &algorithm, variant); |
3194 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3195 | } | |
b216b86b | 3196 | goto skip_synth; |
58b42e19 | 3197 | } |
44037a66 | 3198 | |
84ddb681 RH |
3199 | /* Exclude cost of op0 from max_cost to match the cost |
3200 | calculation of the synth_mult. */ | |
3201 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed); | |
3202 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) | |
3203 | return expand_mult_const (mode, op0, coeff, target, | |
3204 | &algorithm, variant); | |
f2593a66 | 3205 | } |
84ddb681 | 3206 | skip_synth: |
f2593a66 RS |
3207 | |
3208 | /* Expand x*2.0 as x+x. */ | |
48175537 | 3209 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)) |
f2593a66 RS |
3210 | { |
3211 | REAL_VALUE_TYPE d; | |
84ddb681 | 3212 | REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1); |
f2593a66 RS |
3213 | |
3214 | if (REAL_VALUES_EQUAL (d, dconst2)) | |
3215 | { | |
3216 | op0 = force_reg (GET_MODE (op0), op0); | |
3217 | return expand_binop (mode, add_optab, op0, op0, | |
3218 | target, unsignedp, OPTAB_LIB_WIDEN); | |
3219 | } | |
3220 | } | |
84ddb681 | 3221 | skip_scalar: |
f2593a66 | 3222 | |
819126a6 RK |
3223 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3224 | there is no difference between signed and unsigned. */ | |
84ddb681 | 3225 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
44037a66 | 3226 | op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); |
5b0264cb | 3227 | gcc_assert (op0); |
44037a66 TG |
3228 | return op0; |
3229 | } | |
5b58b39b | 3230 | |
6dd8f4bb BS |
3231 | /* Return a cost estimate for multiplying a register by the given |
3232 | COEFFicient in the given MODE and SPEED. */ | |
3233 | ||
3234 | int | |
ef4bddc2 | 3235 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
6dd8f4bb BS |
3236 | { |
3237 | int max_cost; | |
3238 | struct algorithm algorithm; | |
3239 | enum mult_variant variant; | |
3240 | ||
3241 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3242 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed); | |
3243 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) | |
3244 | return algorithm.cost.cost; | |
3245 | else | |
3246 | return max_cost; | |
3247 | } | |
3248 | ||
5b58b39b BS |
3249 | /* Perform a widening multiplication and return an rtx for the result. |
3250 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3251 | TARGET is a suggestion for where to store the result (an rtx). | |
3252 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3253 | or smul_widen_optab. | |
3254 | ||
3255 | We check specially for a constant integer as OP1, comparing the | |
3256 | cost of a widening multiply against the cost of a sequence of shifts | |
3257 | and adds. */ | |
3258 | ||
3259 | rtx | |
ef4bddc2 | 3260 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
5b58b39b BS |
3261 | int unsignedp, optab this_optab) |
3262 | { | |
3263 | bool speed = optimize_insn_for_speed_p (); | |
e7ef91dc | 3264 | rtx cop1; |
5b58b39b BS |
3265 | |
3266 | if (CONST_INT_P (op1) | |
e7ef91dc JJ |
3267 | && GET_MODE (op0) != VOIDmode |
3268 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, | |
3269 | this_optab == umul_widen_optab)) | |
3270 | && CONST_INT_P (cop1) | |
3271 | && (INTVAL (cop1) >= 0 | |
46c9550f | 3272 | || HWI_COMPUTABLE_MODE_P (mode))) |
5b58b39b | 3273 | { |
e7ef91dc | 3274 | HOST_WIDE_INT coeff = INTVAL (cop1); |
5b58b39b BS |
3275 | int max_cost; |
3276 | enum mult_variant variant; | |
3277 | struct algorithm algorithm; | |
3278 | ||
3279 | /* Special case powers of two. */ | |
3280 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3281 | { | |
3282 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3283 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3284 | floor_log2 (coeff), target, unsignedp); |
5b58b39b BS |
3285 | } |
3286 | ||
3287 | /* Exclude cost of op0 from max_cost to match the cost | |
3288 | calculation of the synth_mult. */ | |
5322d07e | 3289 | max_cost = mul_widen_cost (speed, mode); |
5b58b39b BS |
3290 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3291 | max_cost)) | |
3292 | { | |
3293 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3294 | return expand_mult_const (mode, op0, coeff, target, | |
3295 | &algorithm, variant); | |
3296 | } | |
3297 | } | |
3298 | return expand_binop (mode, this_optab, op0, op1, target, | |
3299 | unsignedp, OPTAB_LIB_WIDEN); | |
3300 | } | |
44037a66 | 3301 | \f |
55c2d311 TG |
3302 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3303 | replace division by D, and put the least significant N bits of the result | |
3304 | in *MULTIPLIER_PTR and return the most significant bit. | |
3305 | ||
3306 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3307 | needed precision is in PRECISION (should be <= N). | |
3308 | ||
3309 | PRECISION should be as small as possible so this function can choose | |
3310 | multiplier more freely. | |
3311 | ||
3312 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3313 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3314 | ||
3315 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3316 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3317 | ||
55c2d311 | 3318 | unsigned HOST_WIDE_INT |
502b8322 | 3319 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
079c527f JJ |
3320 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3321 | int *post_shift_ptr, int *lgup_ptr) | |
55c2d311 | 3322 | { |
55c2d311 TG |
3323 | int lgup, post_shift; |
3324 | int pow, pow2; | |
55c2d311 TG |
3325 | |
3326 | /* lgup = ceil(log2(divisor)); */ | |
3327 | lgup = ceil_log2 (d); | |
3328 | ||
5b0264cb | 3329 | gcc_assert (lgup <= n); |
55c2d311 TG |
3330 | |
3331 | pow = n + lgup; | |
3332 | pow2 = n + lgup - precision; | |
3333 | ||
55c2d311 | 3334 | /* mlow = 2^(N + lgup)/d */ |
807e902e KZ |
3335 | wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); |
3336 | wide_int mlow = wi::udiv_trunc (val, d); | |
55c2d311 | 3337 | |
9be0ac8c | 3338 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
807e902e KZ |
3339 | val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); |
3340 | wide_int mhigh = wi::udiv_trunc (val, d); | |
55c2d311 TG |
3341 | |
3342 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3343 | (but they do not exceed 2^(N+1)). */ | |
3344 | ||
f9da5064 | 3345 | /* Reduce to lowest terms. */ |
55c2d311 TG |
3346 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3347 | { | |
807e902e KZ |
3348 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, |
3349 | HOST_BITS_PER_WIDE_INT); | |
3350 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, | |
3351 | HOST_BITS_PER_WIDE_INT); | |
55c2d311 TG |
3352 | if (ml_lo >= mh_lo) |
3353 | break; | |
3354 | ||
807e902e KZ |
3355 | mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3356 | mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); | |
55c2d311 TG |
3357 | } |
3358 | ||
3359 | *post_shift_ptr = post_shift; | |
3360 | *lgup_ptr = lgup; | |
3361 | if (n < HOST_BITS_PER_WIDE_INT) | |
3362 | { | |
3363 | unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; | |
807e902e KZ |
3364 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
3365 | return mhigh.to_uhwi () >= mask; | |
55c2d311 TG |
3366 | } |
3367 | else | |
3368 | { | |
807e902e KZ |
3369 | *multiplier_ptr = mhigh.to_uhwi (); |
3370 | return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); | |
55c2d311 TG |
3371 | } |
3372 | } | |
3373 | ||
3374 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3375 | congruent to 1 (mod 2**N). */ | |
3376 | ||
3377 | static unsigned HOST_WIDE_INT | |
502b8322 | 3378 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
55c2d311 | 3379 | { |
0f41302f | 3380 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
55c2d311 TG |
3381 | |
3382 | /* The algorithm notes that the choice y = x satisfies | |
3383 | x*y == 1 mod 2^3, since x is assumed odd. | |
3384 | Each iteration doubles the number of bits of significance in y. */ | |
3385 | ||
3386 | unsigned HOST_WIDE_INT mask; | |
3387 | unsigned HOST_WIDE_INT y = x; | |
3388 | int nbit = 3; | |
3389 | ||
3390 | mask = (n == HOST_BITS_PER_WIDE_INT | |
3391 | ? ~(unsigned HOST_WIDE_INT) 0 | |
3392 | : ((unsigned HOST_WIDE_INT) 1 << n) - 1); | |
3393 | ||
3394 | while (nbit < n) | |
3395 | { | |
3396 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3397 | nbit *= 2; | |
3398 | } | |
3399 | return y; | |
3400 | } | |
3401 | ||
3402 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3403 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3404 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3405 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3406 | become signed. | |
3407 | ||
3408 | The result is put in TARGET if that is convenient. | |
3409 | ||
3410 | MODE is the mode of operation. */ | |
3411 | ||
3412 | rtx | |
ef4bddc2 | 3413 | expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0, |
502b8322 | 3414 | rtx op1, rtx target, int unsignedp) |
55c2d311 TG |
3415 | { |
3416 | rtx tem; | |
3417 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3418 | ||
3419 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3420 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3421 | tem = expand_and (mode, tem, op1, NULL_RTX); |
38a448ca RH |
3422 | adj_operand |
3423 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3424 | adj_operand); | |
55c2d311 TG |
3425 | |
3426 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
eb6c3df1 | 3427 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3428 | tem = expand_and (mode, tem, op0, NULL_RTX); |
38a448ca RH |
3429 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3430 | target); | |
55c2d311 TG |
3431 | |
3432 | return target; | |
3433 | } | |
3434 | ||
00f07b86 | 3435 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
55c2d311 | 3436 | |
8efc8980 | 3437 | static rtx |
ef4bddc2 | 3438 | extract_high_half (machine_mode mode, rtx op) |
8efc8980 | 3439 | { |
ef4bddc2 | 3440 | machine_mode wider_mode; |
55c2d311 | 3441 | |
8efc8980 RS |
3442 | if (mode == word_mode) |
3443 | return gen_highpart (mode, op); | |
71af73bb | 3444 | |
15ed7b52 JG |
3445 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3446 | ||
8efc8980 RS |
3447 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3448 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, | |
eb6c3df1 | 3449 | GET_MODE_BITSIZE (mode), 0, 1); |
8efc8980 RS |
3450 | return convert_modes (mode, wider_mode, op, 0); |
3451 | } | |
55c2d311 | 3452 | |
00f07b86 | 3453 | /* Like expmed_mult_highpart, but only consider using a multiplication |
8efc8980 RS |
3454 | optab. OP1 is an rtx for the constant operand. */ |
3455 | ||
3456 | static rtx | |
ef4bddc2 | 3457 | expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1, |
8efc8980 | 3458 | rtx target, int unsignedp, int max_cost) |
55c2d311 | 3459 | { |
665acd1e | 3460 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
ef4bddc2 | 3461 | machine_mode wider_mode; |
55c2d311 TG |
3462 | optab moptab; |
3463 | rtx tem; | |
8efc8980 | 3464 | int size; |
f40751dd | 3465 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3466 | |
15ed7b52 JG |
3467 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3468 | ||
8efc8980 RS |
3469 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3470 | size = GET_MODE_BITSIZE (mode); | |
55c2d311 TG |
3471 | |
3472 | /* Firstly, try using a multiplication insn that only generates the needed | |
3473 | high part of the product, and in the sign flavor of unsignedp. */ | |
5322d07e | 3474 | if (mul_highpart_cost (speed, mode) < max_cost) |
71af73bb | 3475 | { |
8efc8980 | 3476 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
665acd1e | 3477 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3478 | unsignedp, OPTAB_DIRECT); |
3479 | if (tem) | |
3480 | return tem; | |
71af73bb | 3481 | } |
55c2d311 TG |
3482 | |
3483 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3484 | Need to adjust the result after the multiplication. */ | |
02a65aef | 3485 | if (size - 1 < BITS_PER_WORD |
5322d07e NF |
3486 | && (mul_highpart_cost (speed, mode) |
3487 | + 2 * shift_cost (speed, mode, size-1) | |
3488 | + 4 * add_cost (speed, mode) < max_cost)) | |
71af73bb | 3489 | { |
8efc8980 | 3490 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
665acd1e | 3491 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3492 | unsignedp, OPTAB_DIRECT); |
3493 | if (tem) | |
71af73bb | 3494 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3495 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
8efc8980 | 3496 | tem, unsignedp); |
71af73bb | 3497 | } |
55c2d311 | 3498 | |
71af73bb | 3499 | /* Try widening multiplication. */ |
55c2d311 | 3500 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
4d8752f0 | 3501 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
5322d07e | 3502 | && mul_widen_cost (speed, wider_mode) < max_cost) |
a295d331 | 3503 | { |
665acd1e | 3504 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
8efc8980 RS |
3505 | unsignedp, OPTAB_WIDEN); |
3506 | if (tem) | |
3507 | return extract_high_half (mode, tem); | |
c410d49e | 3508 | } |
71af73bb TG |
3509 | |
3510 | /* Try widening the mode and perform a non-widening multiplication. */ | |
947131ba | 3511 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
02a65aef | 3512 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3513 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |
3514 | < max_cost)) | |
a295d331 | 3515 | { |
f3f6fb16 DM |
3516 | rtx_insn *insns; |
3517 | rtx wop0, wop1; | |
82dfb9a5 RS |
3518 | |
3519 | /* We need to widen the operands, for example to ensure the | |
3520 | constant multiplier is correctly sign or zero extended. | |
3521 | Use a sequence to clean-up any instructions emitted by | |
3522 | the conversions if things don't work out. */ | |
3523 | start_sequence (); | |
3524 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3525 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3526 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
8efc8980 | 3527 | unsignedp, OPTAB_WIDEN); |
82dfb9a5 RS |
3528 | insns = get_insns (); |
3529 | end_sequence (); | |
3530 | ||
8efc8980 | 3531 | if (tem) |
82dfb9a5 RS |
3532 | { |
3533 | emit_insn (insns); | |
3534 | return extract_high_half (mode, tem); | |
3535 | } | |
a295d331 | 3536 | } |
71af73bb TG |
3537 | |
3538 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3539 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
4d8752f0 | 3540 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
02a65aef | 3541 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3542 | && (mul_widen_cost (speed, wider_mode) |
3543 | + 2 * shift_cost (speed, mode, size-1) | |
3544 | + 4 * add_cost (speed, mode) < max_cost)) | |
55c2d311 | 3545 | { |
665acd1e | 3546 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
71af73bb TG |
3547 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3548 | if (tem != 0) | |
55c2d311 | 3549 | { |
8efc8980 | 3550 | tem = extract_high_half (mode, tem); |
71af73bb | 3551 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3552 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
71af73bb | 3553 | target, unsignedp); |
55c2d311 | 3554 | } |
55c2d311 TG |
3555 | } |
3556 | ||
71af73bb | 3557 | return 0; |
8efc8980 | 3558 | } |
71af73bb | 3559 | |
0d282692 RS |
3560 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3561 | putting the high half of the result in TARGET if that is convenient, | |
3562 | and return where the result is. If the operation can not be performed, | |
3563 | 0 is returned. | |
55c2d311 | 3564 | |
8efc8980 RS |
3565 | MODE is the mode of operation and result. |
3566 | ||
3567 | UNSIGNEDP nonzero means unsigned multiply. | |
3568 | ||
3569 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3570 | ||
0d282692 | 3571 | static rtx |
ef4bddc2 | 3572 | expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1, |
0d282692 | 3573 | rtx target, int unsignedp, int max_cost) |
8efc8980 | 3574 | { |
ef4bddc2 | 3575 | machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
0d282692 | 3576 | unsigned HOST_WIDE_INT cnst1; |
f258e38b UW |
3577 | int extra_cost; |
3578 | bool sign_adjust = false; | |
8efc8980 RS |
3579 | enum mult_variant variant; |
3580 | struct algorithm alg; | |
0d282692 | 3581 | rtx tem; |
f40751dd | 3582 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 3583 | |
15ed7b52 | 3584 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
8efc8980 | 3585 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
46c9550f | 3586 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
8efc8980 | 3587 | |
0d282692 | 3588 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
f258e38b | 3589 | |
b8698a0f L |
3590 | /* We can't optimize modes wider than BITS_PER_WORD. |
3591 | ??? We might be able to perform double-word arithmetic if | |
f258e38b UW |
3592 | mode == word_mode, however all the cost calculations in |
3593 | synth_mult etc. assume single-word operations. */ | |
3594 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) | |
00f07b86 | 3595 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
f258e38b UW |
3596 | unsignedp, max_cost); |
3597 | ||
5322d07e | 3598 | extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); |
f258e38b UW |
3599 | |
3600 | /* Check whether we try to multiply by a negative constant. */ | |
3601 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3602 | { | |
3603 | sign_adjust = true; | |
5322d07e | 3604 | extra_cost += add_cost (speed, mode); |
f258e38b | 3605 | } |
8efc8980 RS |
3606 | |
3607 | /* See whether shift/add multiplication is cheap enough. */ | |
f258e38b UW |
3608 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3609 | max_cost - extra_cost)) | |
a295d331 | 3610 | { |
8efc8980 RS |
3611 | /* See whether the specialized multiplication optabs are |
3612 | cheaper than the shift/add version. */ | |
00f07b86 | 3613 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
26276705 | 3614 | alg.cost.cost + extra_cost); |
8efc8980 RS |
3615 | if (tem) |
3616 | return tem; | |
3617 | ||
f258e38b UW |
3618 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3619 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3620 | tem = extract_high_half (mode, tem); | |
3621 | ||
9cf737f8 | 3622 | /* Adjust result for signedness. */ |
f258e38b UW |
3623 | if (sign_adjust) |
3624 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3625 | ||
3626 | return tem; | |
a295d331 | 3627 | } |
00f07b86 | 3628 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
8efc8980 | 3629 | unsignedp, max_cost); |
55c2d311 | 3630 | } |
0b55e932 RS |
3631 | |
3632 | ||
3633 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3634 | ||
3635 | static rtx | |
ef4bddc2 | 3636 | expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) |
0b55e932 | 3637 | { |
f3f6fb16 DM |
3638 | rtx result, temp, shift; |
3639 | rtx_code_label *label; | |
0b55e932 | 3640 | int logd; |
807e902e | 3641 | int prec = GET_MODE_PRECISION (mode); |
0b55e932 RS |
3642 | |
3643 | logd = floor_log2 (d); | |
3644 | result = gen_reg_rtx (mode); | |
3645 | ||
3646 | /* Avoid conditional branches when they're expensive. */ | |
3a4fd356 | 3647 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
22660666 | 3648 | && optimize_insn_for_speed_p ()) |
0b55e932 RS |
3649 | { |
3650 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3651 | mode, 0, -1); | |
3652 | if (signmask) | |
3653 | { | |
807e902e | 3654 | HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
0b55e932 | 3655 | signmask = force_reg (mode, signmask); |
1c234fcb RS |
3656 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3657 | ||
3658 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3659 | which instruction sequence to use. If logical right shifts | |
3660 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3661 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
6e7a355c | 3662 | |
1c234fcb | 3663 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
947131ba | 3664 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
5e8f01f4 RS |
3665 | || (set_src_cost (temp, optimize_insn_for_speed_p ()) |
3666 | > COSTS_N_INSNS (2))) | |
1c234fcb RS |
3667 | { |
3668 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3669 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3670 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3671 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
3672 | temp = expand_binop (mode, and_optab, temp, |
3673 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
3674 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3675 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3676 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3677 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3678 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3679 | } | |
3680 | else | |
3681 | { | |
3682 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3683 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3684 | signmask = force_reg (mode, signmask); | |
3685 | ||
3686 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3687 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
3688 | temp = expand_binop (mode, and_optab, temp, |
3689 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
3690 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3691 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3692 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3693 | } | |
0b55e932 RS |
3694 | return temp; |
3695 | } | |
3696 | } | |
3697 | ||
3698 | /* Mask contains the mode's signbit and the significant bits of the | |
3699 | modulus. By including the signbit in the operation, many targets | |
3700 | can avoid an explicit compare operation in the following comparison | |
3701 | against zero. */ | |
807e902e KZ |
3702 | wide_int mask = wi::mask (logd, false, prec); |
3703 | mask = wi::set_bit (mask, prec - 1); | |
0b55e932 | 3704 | |
6e7a355c | 3705 | temp = expand_binop (mode, and_optab, op0, |
807e902e | 3706 | immed_wide_int_const (mask, mode), |
6e7a355c | 3707 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
3708 | if (temp != result) |
3709 | emit_move_insn (result, temp); | |
3710 | ||
3711 | label = gen_label_rtx (); | |
3712 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3713 | ||
3714 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3715 | 0, OPTAB_LIB_WIDEN); | |
807e902e KZ |
3716 | |
3717 | mask = wi::mask (logd, true, prec); | |
6e7a355c | 3718 | temp = expand_binop (mode, ior_optab, temp, |
807e902e | 3719 | immed_wide_int_const (mask, mode), |
6e7a355c | 3720 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
3721 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3722 | 0, OPTAB_LIB_WIDEN); | |
3723 | if (temp != result) | |
3724 | emit_move_insn (result, temp); | |
3725 | emit_label (label); | |
3726 | return result; | |
3727 | } | |
39cab019 RS |
3728 | |
3729 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3730 | This routine is only called for positive values of D. */ | |
3731 | ||
3732 | static rtx | |
ef4bddc2 | 3733 | expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) |
39cab019 | 3734 | { |
f3f6fb16 DM |
3735 | rtx temp; |
3736 | rtx_code_label *label; | |
39cab019 RS |
3737 | int logd; |
3738 | ||
3739 | logd = floor_log2 (d); | |
39cab019 | 3740 | |
3a4fd356 JH |
3741 | if (d == 2 |
3742 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
3743 | false) >= 1) | |
39cab019 RS |
3744 | { |
3745 | temp = gen_reg_rtx (mode); | |
3746 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3747 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3748 | 0, OPTAB_LIB_WIDEN); | |
eb6c3df1 | 3749 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 RS |
3750 | } |
3751 | ||
fdded401 | 3752 | #ifdef HAVE_conditional_move |
3a4fd356 JH |
3753 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) |
3754 | >= 2) | |
fdded401 RS |
3755 | { |
3756 | rtx temp2; | |
3757 | ||
3758 | start_sequence (); | |
3759 | temp2 = copy_to_mode_reg (mode, op0); | |
2f1cd2eb | 3760 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
fdded401 RS |
3761 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3762 | temp = force_reg (mode, temp); | |
3763 | ||
3764 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3765 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3766 | mode, temp, temp2, mode, 0); | |
3767 | if (temp2) | |
3768 | { | |
f3f6fb16 | 3769 | rtx_insn *seq = get_insns (); |
fdded401 RS |
3770 | end_sequence (); |
3771 | emit_insn (seq); | |
eb6c3df1 | 3772 | return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); |
fdded401 RS |
3773 | } |
3774 | end_sequence (); | |
3775 | } | |
3776 | #endif | |
3777 | ||
3a4fd356 JH |
3778 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
3779 | false) >= 2) | |
39cab019 RS |
3780 | { |
3781 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3782 | ||
3783 | temp = gen_reg_rtx (mode); | |
3784 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
00a7ba58 JJ |
3785 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD |
3786 | || shift_cost (optimize_insn_for_speed_p (), mode, ushift) | |
3787 | > COSTS_N_INSNS (1)) | |
2f1cd2eb | 3788 | temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), |
39cab019 RS |
3789 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3790 | else | |
3791 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
eb6c3df1 | 3792 | ushift, NULL_RTX, 1); |
39cab019 RS |
3793 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3794 | 0, OPTAB_LIB_WIDEN); | |
eb6c3df1 | 3795 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 RS |
3796 | } |
3797 | ||
3798 | label = gen_label_rtx (); | |
3799 | temp = copy_to_mode_reg (mode, op0); | |
3800 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
2f1cd2eb | 3801 | expand_inc (temp, gen_int_mode (d - 1, mode)); |
39cab019 | 3802 | emit_label (label); |
eb6c3df1 | 3803 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 | 3804 | } |
55c2d311 | 3805 | \f |
44037a66 TG |
3806 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
3807 | if that is convenient, and returning where the result is. | |
3808 | You may request either the quotient or the remainder as the result; | |
3809 | specify REM_FLAG nonzero to get the remainder. | |
3810 | ||
3811 | CODE is the expression code for which kind of division this is; | |
3812 | it controls how rounding is done. MODE is the machine mode to use. | |
3813 | UNSIGNEDP nonzero means do unsigned division. */ | |
3814 | ||
3815 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
3816 | and then correct it by or'ing in missing high bits | |
3817 | if result of ANDI is nonzero. | |
3818 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
3819 | This could optimize to a bfexts instruction. | |
3820 | But C doesn't use these operations, so their optimizations are | |
3821 | left for later. */ | |
5353610b R |
3822 | /* ??? For modulo, we don't actually need the highpart of the first product, |
3823 | the low part will do nicely. And for small divisors, the second multiply | |
3824 | can also be a low-part only multiply or even be completely left out. | |
3825 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
3826 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
3827 | the result is exact for inputs up to 0x1fffffff. | |
3828 | The input range can be reduced by using cross-sum rules. | |
3829 | For odd divisors >= 3, the following table gives right shift counts | |
09da1532 | 3830 | so that if a number is shifted by an integer multiple of the given |
5353610b R |
3831 | amount, the remainder stays the same: |
3832 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
3833 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
3834 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
3835 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
3836 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
3837 | ||
3838 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
3839 | to the right alone as the divisor has zeros to the right. | |
3840 | E.g. if x is an unsigned 32 bit number: | |
3841 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
3842 | */ | |
44037a66 TG |
3843 | |
3844 | rtx | |
ef4bddc2 | 3845 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
502b8322 | 3846 | rtx op0, rtx op1, rtx target, int unsignedp) |
44037a66 | 3847 | { |
ef4bddc2 | 3848 | machine_mode compute_mode; |
b3694847 | 3849 | rtx tquotient; |
55c2d311 | 3850 | rtx quotient = 0, remainder = 0; |
f3f6fb16 | 3851 | rtx_insn *last; |
2c414fba | 3852 | int size; |
f3f6fb16 | 3853 | rtx_insn *insn; |
44037a66 | 3854 | optab optab1, optab2; |
1c4a429a | 3855 | int op1_is_constant, op1_is_pow2 = 0; |
71af73bb | 3856 | int max_cost, extra_cost; |
9ec36da5 | 3857 | static HOST_WIDE_INT last_div_const = 0; |
f40751dd | 3858 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3859 | |
481683e1 | 3860 | op1_is_constant = CONST_INT_P (op1); |
1c4a429a JH |
3861 | if (op1_is_constant) |
3862 | { | |
be63b77d | 3863 | unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1); |
1c4a429a JH |
3864 | if (unsignedp) |
3865 | ext_op1 &= GET_MODE_MASK (mode); | |
3866 | op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) | |
3867 | || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); | |
3868 | } | |
55c2d311 TG |
3869 | |
3870 | /* | |
3871 | This is the structure of expand_divmod: | |
3872 | ||
3873 | First comes code to fix up the operands so we can perform the operations | |
3874 | correctly and efficiently. | |
3875 | ||
3876 | Second comes a switch statement with code specific for each rounding mode. | |
3877 | For some special operands this code emits all RTL for the desired | |
69f61901 | 3878 | operation, for other cases, it generates only a quotient and stores it in |
55c2d311 TG |
3879 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
3880 | to indicate that it has not done anything. | |
3881 | ||
69f61901 RK |
3882 | Last comes code that finishes the operation. If QUOTIENT is set and |
3883 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
3884 | QUOTIENT is not set, it is computed using trunc rounding. | |
44037a66 | 3885 | |
55c2d311 TG |
3886 | We try to generate special code for division and remainder when OP1 is a |
3887 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
3888 | operations. For other values of OP1, we compute a carefully selected | |
3889 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
3890 | by m. | |
3891 | ||
3892 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
3893 | half of the product. Different strategies for generating the product are | |
00f07b86 | 3894 | implemented in expmed_mult_highpart. |
55c2d311 TG |
3895 | |
3896 | If what we actually want is the remainder, we generate that by another | |
3897 | by-constant multiplication and a subtraction. */ | |
3898 | ||
3899 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
3d32ffd1 TW |
3900 | code below will malfunction if we are, so check here and handle |
3901 | the special case if so. */ | |
3902 | if (op1 == const1_rtx) | |
3903 | return rem_flag ? const0_rtx : op0; | |
3904 | ||
91ce572a CC |
3905 | /* When dividing by -1, we could get an overflow. |
3906 | negv_optab can handle overflows. */ | |
3907 | if (! unsignedp && op1 == constm1_rtx) | |
3908 | { | |
3909 | if (rem_flag) | |
0fb7aeda | 3910 | return const0_rtx; |
c3284718 | 3911 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
0fb7aeda | 3912 | ? negv_optab : neg_optab, op0, target, 0); |
91ce572a CC |
3913 | } |
3914 | ||
bc1c7e93 RK |
3915 | if (target |
3916 | /* Don't use the function value register as a target | |
3917 | since we have to read it as well as write it, | |
3918 | and function-inlining gets confused by this. */ | |
3919 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
3920 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
515dfc7a | 3921 | || ((rem_flag || op1_is_constant) |
bc1c7e93 | 3922 | && (reg_mentioned_p (target, op0) |
3c0cb5de | 3923 | || (MEM_P (op0) && MEM_P (target)))) |
bc1c7e93 | 3924 | || reg_mentioned_p (target, op1) |
3c0cb5de | 3925 | || (MEM_P (op1) && MEM_P (target)))) |
44037a66 TG |
3926 | target = 0; |
3927 | ||
44037a66 TG |
3928 | /* Get the mode in which to perform this computation. Normally it will |
3929 | be MODE, but sometimes we can't do the desired operation in MODE. | |
3930 | If so, pick a wider mode in which we can do the operation. Convert | |
3931 | to that mode at the start to avoid repeated conversions. | |
3932 | ||
3933 | First see what operations we need. These depend on the expression | |
3934 | we are evaluating. (We assume that divxx3 insns exist under the | |
3935 | same conditions that modxx3 insns and that these insns don't normally | |
3936 | fail. If these assumptions are not correct, we may generate less | |
3937 | efficient code in some cases.) | |
3938 | ||
3939 | Then see if we find a mode in which we can open-code that operation | |
3940 | (either a division, modulus, or shift). Finally, check for the smallest | |
3941 | mode for which we can do the operation with a library call. */ | |
3942 | ||
55c2d311 | 3943 | /* We might want to refine this now that we have division-by-constant |
00f07b86 | 3944 | optimization. Since expmed_mult_highpart tries so many variants, it is |
55c2d311 TG |
3945 | not straightforward to generalize this. Maybe we should make an array |
3946 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
3947 | ||
556a56ac DM |
3948 | optab1 = ((op1_is_pow2 && op1 != const0_rtx) |
3949 | ? (unsignedp ? lshr_optab : ashr_optab) | |
44037a66 | 3950 | : (unsignedp ? udiv_optab : sdiv_optab)); |
556a56ac DM |
3951 | optab2 = ((op1_is_pow2 && op1 != const0_rtx) |
3952 | ? optab1 | |
3953 | : (unsignedp ? udivmod_optab : sdivmod_optab)); | |
44037a66 TG |
3954 | |
3955 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3956 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
947131ba RS |
3957 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing |
3958 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
44037a66 TG |
3959 | break; |
3960 | ||
3961 | if (compute_mode == VOIDmode) | |
3962 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3963 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
8a33f100 JH |
3964 | if (optab_libfunc (optab1, compute_mode) |
3965 | || optab_libfunc (optab2, compute_mode)) | |
44037a66 TG |
3966 | break; |
3967 | ||
535a42b1 NS |
3968 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
3969 | probably die. */ | |
44037a66 TG |
3970 | if (compute_mode == VOIDmode) |
3971 | compute_mode = mode; | |
3972 | ||
55c2d311 TG |
3973 | if (target && GET_MODE (target) == compute_mode) |
3974 | tquotient = target; | |
3975 | else | |
3976 | tquotient = gen_reg_rtx (compute_mode); | |
2c414fba | 3977 | |
55c2d311 TG |
3978 | size = GET_MODE_BITSIZE (compute_mode); |
3979 | #if 0 | |
3980 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
71af73bb TG |
3981 | (mode), and thereby get better code when OP1 is a constant. Do that |
3982 | later. It will require going over all usages of SIZE below. */ | |
55c2d311 TG |
3983 | size = GET_MODE_BITSIZE (mode); |
3984 | #endif | |
bc1c7e93 | 3985 | |
9ec36da5 JL |
3986 | /* Only deduct something for a REM if the last divide done was |
3987 | for a different constant. Then set the constant of the last | |
3988 | divide. */ | |
5322d07e NF |
3989 | max_cost = (unsignedp |
3990 | ? udiv_cost (speed, compute_mode) | |
3991 | : sdiv_cost (speed, compute_mode)); | |
a28b2ac6 RS |
3992 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
3993 | && INTVAL (op1) == last_div_const)) | |
5322d07e NF |
3994 | max_cost -= (mul_cost (speed, compute_mode) |
3995 | + add_cost (speed, compute_mode)); | |
9ec36da5 JL |
3996 | |
3997 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
71af73bb | 3998 | |
55c2d311 | 3999 | /* Now convert to the best mode to use. */ |
44037a66 TG |
4000 | if (compute_mode != mode) |
4001 | { | |
55c2d311 | 4002 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
81722fa9 | 4003 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
e13a25d5 | 4004 | |
e9a25f70 JL |
4005 | /* convert_modes may have placed op1 into a register, so we |
4006 | must recompute the following. */ | |
481683e1 | 4007 | op1_is_constant = CONST_INT_P (op1); |
e13a25d5 DM |
4008 | op1_is_pow2 = (op1_is_constant |
4009 | && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
4010 | || (! unsignedp | |
be63b77d | 4011 | && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1)))))); |
44037a66 TG |
4012 | } |
4013 | ||
55c2d311 | 4014 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
c2a47e48 | 4015 | |
3c0cb5de | 4016 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
55c2d311 | 4017 | op0 = force_reg (compute_mode, op0); |
3c0cb5de | 4018 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
c2a47e48 RK |
4019 | op1 = force_reg (compute_mode, op1); |
4020 | ||
ab0b6581 TG |
4021 | /* If we need the remainder or if OP1 is constant, we need to |
4022 | put OP0 in a register in case it has any queued subexpressions. */ | |
4023 | if (rem_flag || op1_is_constant) | |
4024 | op0 = force_reg (compute_mode, op0); | |
bc1c7e93 | 4025 | |
55c2d311 | 4026 | last = get_last_insn (); |
44037a66 | 4027 | |
9faa82d8 | 4028 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
55c2d311 | 4029 | if (unsignedp) |
44037a66 | 4030 | { |
55c2d311 TG |
4031 | if (code == FLOOR_DIV_EXPR) |
4032 | code = TRUNC_DIV_EXPR; | |
4033 | if (code == FLOOR_MOD_EXPR) | |
4034 | code = TRUNC_MOD_EXPR; | |
db7cafb0 JL |
4035 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4036 | code = TRUNC_DIV_EXPR; | |
55c2d311 | 4037 | } |
bc1c7e93 | 4038 | |
55c2d311 TG |
4039 | if (op1 != const0_rtx) |
4040 | switch (code) | |
4041 | { | |
4042 | case TRUNC_MOD_EXPR: | |
4043 | case TRUNC_DIV_EXPR: | |
34f016ed | 4044 | if (op1_is_constant) |
55c2d311 | 4045 | { |
d8f1376c | 4046 | if (unsignedp) |
55c2d311 | 4047 | { |
079c527f | 4048 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4049 | int pre_shift, post_shift; |
4050 | int dummy; | |
1c4a429a JH |
4051 | unsigned HOST_WIDE_INT d = (INTVAL (op1) |
4052 | & GET_MODE_MASK (compute_mode)); | |
55c2d311 TG |
4053 | |
4054 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4055 | { | |
4056 | pre_shift = floor_log2 (d); | |
4057 | if (rem_flag) | |
4058 | { | |
2f1cd2eb RS |
4059 | unsigned HOST_WIDE_INT mask |
4060 | = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1; | |
db3cf6fb MS |
4061 | remainder |
4062 | = expand_binop (compute_mode, and_optab, op0, | |
2f1cd2eb | 4063 | gen_int_mode (mask, compute_mode), |
db3cf6fb MS |
4064 | remainder, 1, |
4065 | OPTAB_LIB_WIDEN); | |
55c2d311 | 4066 | if (remainder) |
c8dbc8ca | 4067 | return gen_lowpart (mode, remainder); |
55c2d311 TG |
4068 | } |
4069 | quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4070 | pre_shift, tquotient, 1); |
55c2d311 | 4071 | } |
34f016ed | 4072 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 4073 | { |
dc1d6150 | 4074 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) |
55c2d311 | 4075 | { |
dc1d6150 TG |
4076 | /* Most significant bit of divisor is set; emit an scc |
4077 | insn. */ | |
b45f0e58 PB |
4078 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4079 | compute_mode, 1, 1); | |
55c2d311 TG |
4080 | } |
4081 | else | |
4082 | { | |
dc1d6150 TG |
4083 | /* Find a suitable multiplier and right shift count |
4084 | instead of multiplying with D. */ | |
4085 | ||
4086 | mh = choose_multiplier (d, size, size, | |
4087 | &ml, &post_shift, &dummy); | |
4088 | ||
4089 | /* If the suggested multiplier is more than SIZE bits, | |
4090 | we can do better for even divisors, using an | |
4091 | initial right shift. */ | |
4092 | if (mh != 0 && (d & 1) == 0) | |
4093 | { | |
4094 | pre_shift = floor_log2 (d & -d); | |
4095 | mh = choose_multiplier (d >> pre_shift, size, | |
4096 | size - pre_shift, | |
4097 | &ml, &post_shift, &dummy); | |
5b0264cb | 4098 | gcc_assert (!mh); |
dc1d6150 TG |
4099 | } |
4100 | else | |
4101 | pre_shift = 0; | |
4102 | ||
4103 | if (mh != 0) | |
4104 | { | |
4105 | rtx t1, t2, t3, t4; | |
4106 | ||
02a65aef R |
4107 | if (post_shift - 1 >= BITS_PER_WORD) |
4108 | goto fail1; | |
4109 | ||
965703ed | 4110 | extra_cost |
5322d07e NF |
4111 | = (shift_cost (speed, compute_mode, post_shift - 1) |
4112 | + shift_cost (speed, compute_mode, 1) | |
4113 | + 2 * add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4114 | t1 = expmed_mult_highpart |
4115 | (compute_mode, op0, | |
4116 | gen_int_mode (ml, compute_mode), | |
4117 | NULL_RTX, 1, max_cost - extra_cost); | |
dc1d6150 TG |
4118 | if (t1 == 0) |
4119 | goto fail1; | |
38a448ca RH |
4120 | t2 = force_operand (gen_rtx_MINUS (compute_mode, |
4121 | op0, t1), | |
dc1d6150 | 4122 | NULL_RTX); |
eb6c3df1 RG |
4123 | t3 = expand_shift (RSHIFT_EXPR, compute_mode, |
4124 | t2, 1, NULL_RTX, 1); | |
38a448ca RH |
4125 | t4 = force_operand (gen_rtx_PLUS (compute_mode, |
4126 | t1, t3), | |
dc1d6150 | 4127 | NULL_RTX); |
4a90aeeb NS |
4128 | quotient = expand_shift |
4129 | (RSHIFT_EXPR, compute_mode, t4, | |
eb6c3df1 | 4130 | post_shift - 1, tquotient, 1); |
dc1d6150 TG |
4131 | } |
4132 | else | |
4133 | { | |
4134 | rtx t1, t2; | |
4135 | ||
02a65aef R |
4136 | if (pre_shift >= BITS_PER_WORD |
4137 | || post_shift >= BITS_PER_WORD) | |
4138 | goto fail1; | |
4139 | ||
4a90aeeb NS |
4140 | t1 = expand_shift |
4141 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4142 | pre_shift, NULL_RTX, 1); |
965703ed | 4143 | extra_cost |
5322d07e NF |
4144 | = (shift_cost (speed, compute_mode, pre_shift) |
4145 | + shift_cost (speed, compute_mode, post_shift)); | |
2f1cd2eb RS |
4146 | t2 = expmed_mult_highpart |
4147 | (compute_mode, t1, | |
4148 | gen_int_mode (ml, compute_mode), | |
4149 | NULL_RTX, 1, max_cost - extra_cost); | |
dc1d6150 TG |
4150 | if (t2 == 0) |
4151 | goto fail1; | |
4a90aeeb NS |
4152 | quotient = expand_shift |
4153 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4154 | post_shift, tquotient, 1); |
dc1d6150 | 4155 | } |
55c2d311 TG |
4156 | } |
4157 | } | |
34f016ed TG |
4158 | else /* Too wide mode to use tricky code */ |
4159 | break; | |
55c2d311 TG |
4160 | |
4161 | insn = get_last_insn (); | |
7543f918 JR |
4162 | if (insn != last) |
4163 | set_dst_reg_note (insn, REG_EQUAL, | |
4164 | gen_rtx_UDIV (compute_mode, op0, op1), | |
4165 | quotient); | |
55c2d311 TG |
4166 | } |
4167 | else /* TRUNC_DIV, signed */ | |
4168 | { | |
4169 | unsigned HOST_WIDE_INT ml; | |
4170 | int lgup, post_shift; | |
e71c0aa7 | 4171 | rtx mlr; |
55c2d311 | 4172 | HOST_WIDE_INT d = INTVAL (op1); |
e4c9f3c2 ILT |
4173 | unsigned HOST_WIDE_INT abs_d; |
4174 | ||
093253be ILT |
4175 | /* Since d might be INT_MIN, we have to cast to |
4176 | unsigned HOST_WIDE_INT before negating to avoid | |
4177 | undefined signed overflow. */ | |
6d9c91e9 ILT |
4178 | abs_d = (d >= 0 |
4179 | ? (unsigned HOST_WIDE_INT) d | |
4180 | : - (unsigned HOST_WIDE_INT) d); | |
55c2d311 TG |
4181 | |
4182 | /* n rem d = n rem -d */ | |
4183 | if (rem_flag && d < 0) | |
4184 | { | |
4185 | d = abs_d; | |
2496c7bd | 4186 | op1 = gen_int_mode (abs_d, compute_mode); |
55c2d311 TG |
4187 | } |
4188 | ||
4189 | if (d == 1) | |
4190 | quotient = op0; | |
4191 | else if (d == -1) | |
4192 | quotient = expand_unop (compute_mode, neg_optab, op0, | |
4193 | tquotient, 0); | |
f6c1336c ILT |
4194 | else if (HOST_BITS_PER_WIDE_INT >= size |
4195 | && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) | |
f737b132 RK |
4196 | { |
4197 | /* This case is not handled correctly below. */ | |
4198 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
4199 | compute_mode, 1, 1); | |
4200 | if (quotient == 0) | |
4201 | goto fail1; | |
4202 | } | |
55c2d311 | 4203 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
5322d07e NF |
4204 | && (rem_flag |
4205 | ? smod_pow2_cheap (speed, compute_mode) | |
4206 | : sdiv_pow2_cheap (speed, compute_mode)) | |
0b55e932 RS |
4207 | /* We assume that cheap metric is true if the |
4208 | optab has an expander for this mode. */ | |
166cdb08 JH |
4209 | && ((optab_handler ((rem_flag ? smod_optab |
4210 | : sdiv_optab), | |
947131ba | 4211 | compute_mode) |
a8c7e72d | 4212 | != CODE_FOR_nothing) |
947131ba RS |
4213 | || (optab_handler (sdivmod_optab, |
4214 | compute_mode) | |
4215 | != CODE_FOR_nothing))) | |
55c2d311 TG |
4216 | ; |
4217 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) | |
4218 | { | |
0b55e932 RS |
4219 | if (rem_flag) |
4220 | { | |
4221 | remainder = expand_smod_pow2 (compute_mode, op0, d); | |
4222 | if (remainder) | |
4223 | return gen_lowpart (mode, remainder); | |
4224 | } | |
3d520aaf | 4225 | |
5322d07e | 4226 | if (sdiv_pow2_cheap (speed, compute_mode) |
947131ba | 4227 | && ((optab_handler (sdiv_optab, compute_mode) |
3d520aaf | 4228 | != CODE_FOR_nothing) |
947131ba | 4229 | || (optab_handler (sdivmod_optab, compute_mode) |
3d520aaf DE |
4230 | != CODE_FOR_nothing))) |
4231 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
4232 | compute_mode, op0, | |
4233 | gen_int_mode (abs_d, | |
4234 | compute_mode), | |
4235 | NULL_RTX, 0); | |
4236 | else | |
4237 | quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); | |
55c2d311 | 4238 | |
0b55e932 RS |
4239 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4240 | negate the quotient. */ | |
55c2d311 TG |
4241 | if (d < 0) |
4242 | { | |
4243 | insn = get_last_insn (); | |
4e430df8 | 4244 | if (insn != last |
c8e7fe58 DE |
4245 | && abs_d < ((unsigned HOST_WIDE_INT) 1 |
4246 | << (HOST_BITS_PER_WIDE_INT - 1))) | |
7543f918 JR |
4247 | set_dst_reg_note (insn, REG_EQUAL, |
4248 | gen_rtx_DIV (compute_mode, op0, | |
6d26322f JR |
4249 | gen_int_mode |
4250 | (abs_d, | |
4251 | compute_mode)), | |
7543f918 | 4252 | quotient); |
55c2d311 TG |
4253 | |
4254 | quotient = expand_unop (compute_mode, neg_optab, | |
4255 | quotient, quotient, 0); | |
4256 | } | |
4257 | } | |
34f016ed | 4258 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4259 | { |
4260 | choose_multiplier (abs_d, size, size - 1, | |
079c527f | 4261 | &ml, &post_shift, &lgup); |
55c2d311 TG |
4262 | if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) |
4263 | { | |
4264 | rtx t1, t2, t3; | |
4265 | ||
02a65aef R |
4266 | if (post_shift >= BITS_PER_WORD |
4267 | || size - 1 >= BITS_PER_WORD) | |
4268 | goto fail1; | |
4269 | ||
5322d07e NF |
4270 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4271 | + shift_cost (speed, compute_mode, size - 1) | |
4272 | + add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4273 | t1 = expmed_mult_highpart |
4274 | (compute_mode, op0, gen_int_mode (ml, compute_mode), | |
4275 | NULL_RTX, 0, max_cost - extra_cost); | |
55c2d311 TG |
4276 | if (t1 == 0) |
4277 | goto fail1; | |
4a90aeeb NS |
4278 | t2 = expand_shift |
4279 | (RSHIFT_EXPR, compute_mode, t1, | |
eb6c3df1 | 4280 | post_shift, NULL_RTX, 0); |
4a90aeeb NS |
4281 | t3 = expand_shift |
4282 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4283 | size - 1, NULL_RTX, 0); |
55c2d311 | 4284 | if (d < 0) |
c5c76735 JL |
4285 | quotient |
4286 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4287 | t3, t2), | |
4288 | tquotient); | |
55c2d311 | 4289 | else |
c5c76735 JL |
4290 | quotient |
4291 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4292 | t2, t3), | |
4293 | tquotient); | |
55c2d311 TG |
4294 | } |
4295 | else | |
4296 | { | |
4297 | rtx t1, t2, t3, t4; | |
4298 | ||
02a65aef R |
4299 | if (post_shift >= BITS_PER_WORD |
4300 | || size - 1 >= BITS_PER_WORD) | |
4301 | goto fail1; | |
4302 | ||
55c2d311 | 4303 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); |
e71c0aa7 | 4304 | mlr = gen_int_mode (ml, compute_mode); |
5322d07e NF |
4305 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4306 | + shift_cost (speed, compute_mode, size - 1) | |
4307 | + 2 * add_cost (speed, compute_mode)); | |
00f07b86 | 4308 | t1 = expmed_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4309 | NULL_RTX, 0, |
4310 | max_cost - extra_cost); | |
55c2d311 TG |
4311 | if (t1 == 0) |
4312 | goto fail1; | |
c5c76735 JL |
4313 | t2 = force_operand (gen_rtx_PLUS (compute_mode, |
4314 | t1, op0), | |
55c2d311 | 4315 | NULL_RTX); |
4a90aeeb NS |
4316 | t3 = expand_shift |
4317 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4318 | post_shift, NULL_RTX, 0); |
4a90aeeb NS |
4319 | t4 = expand_shift |
4320 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4321 | size - 1, NULL_RTX, 0); |
55c2d311 | 4322 | if (d < 0) |
c5c76735 JL |
4323 | quotient |
4324 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4325 | t4, t3), | |
4326 | tquotient); | |
55c2d311 | 4327 | else |
c5c76735 JL |
4328 | quotient |
4329 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4330 | t3, t4), | |
4331 | tquotient); | |
55c2d311 TG |
4332 | } |
4333 | } | |
34f016ed TG |
4334 | else /* Too wide mode to use tricky code */ |
4335 | break; | |
55c2d311 | 4336 | |
4e430df8 | 4337 | insn = get_last_insn (); |
7543f918 JR |
4338 | if (insn != last) |
4339 | set_dst_reg_note (insn, REG_EQUAL, | |
4340 | gen_rtx_DIV (compute_mode, op0, op1), | |
4341 | quotient); | |
55c2d311 TG |
4342 | } |
4343 | break; | |
4344 | } | |
4345 | fail1: | |
4346 | delete_insns_since (last); | |
4347 | break; | |
44037a66 | 4348 | |
55c2d311 TG |
4349 | case FLOOR_DIV_EXPR: |
4350 | case FLOOR_MOD_EXPR: | |
4351 | /* We will come here only for signed operations. */ | |
4352 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4353 | { | |
079c527f | 4354 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4355 | int pre_shift, lgup, post_shift; |
4356 | HOST_WIDE_INT d = INTVAL (op1); | |
4357 | ||
4358 | if (d > 0) | |
4359 | { | |
4360 | /* We could just as easily deal with negative constants here, | |
4361 | but it does not seem worth the trouble for GCC 2.6. */ | |
4362 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4363 | { | |
4364 | pre_shift = floor_log2 (d); | |
4365 | if (rem_flag) | |
4366 | { | |
2f1cd2eb RS |
4367 | unsigned HOST_WIDE_INT mask |
4368 | = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1; | |
4369 | remainder = expand_binop | |
4370 | (compute_mode, and_optab, op0, | |
4371 | gen_int_mode (mask, compute_mode), | |
4372 | remainder, 0, OPTAB_LIB_WIDEN); | |
55c2d311 | 4373 | if (remainder) |
c8dbc8ca | 4374 | return gen_lowpart (mode, remainder); |
55c2d311 | 4375 | } |
4a90aeeb NS |
4376 | quotient = expand_shift |
4377 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4378 | pre_shift, tquotient, 0); |
55c2d311 TG |
4379 | } |
4380 | else | |
4381 | { | |
4382 | rtx t1, t2, t3, t4; | |
4383 | ||
4384 | mh = choose_multiplier (d, size, size - 1, | |
4385 | &ml, &post_shift, &lgup); | |
5b0264cb | 4386 | gcc_assert (!mh); |
55c2d311 | 4387 | |
02a65aef R |
4388 | if (post_shift < BITS_PER_WORD |
4389 | && size - 1 < BITS_PER_WORD) | |
55c2d311 | 4390 | { |
4a90aeeb NS |
4391 | t1 = expand_shift |
4392 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4393 | size - 1, NULL_RTX, 0); |
02a65aef R |
4394 | t2 = expand_binop (compute_mode, xor_optab, op0, t1, |
4395 | NULL_RTX, 0, OPTAB_WIDEN); | |
5322d07e NF |
4396 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4397 | + shift_cost (speed, compute_mode, size - 1) | |
4398 | + 2 * add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4399 | t3 = expmed_mult_highpart |
4400 | (compute_mode, t2, gen_int_mode (ml, compute_mode), | |
4401 | NULL_RTX, 1, max_cost - extra_cost); | |
02a65aef R |
4402 | if (t3 != 0) |
4403 | { | |
4a90aeeb NS |
4404 | t4 = expand_shift |
4405 | (RSHIFT_EXPR, compute_mode, t3, | |
eb6c3df1 | 4406 | post_shift, NULL_RTX, 1); |
02a65aef R |
4407 | quotient = expand_binop (compute_mode, xor_optab, |
4408 | t4, t1, tquotient, 0, | |
4409 | OPTAB_WIDEN); | |
4410 | } | |
55c2d311 TG |
4411 | } |
4412 | } | |
4413 | } | |
4414 | else | |
4415 | { | |
4416 | rtx nsign, t1, t2, t3, t4; | |
38a448ca RH |
4417 | t1 = force_operand (gen_rtx_PLUS (compute_mode, |
4418 | op0, constm1_rtx), NULL_RTX); | |
55c2d311 TG |
4419 | t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, |
4420 | 0, OPTAB_WIDEN); | |
4a90aeeb NS |
4421 | nsign = expand_shift |
4422 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4423 | size - 1, NULL_RTX, 0); |
38a448ca | 4424 | t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), |
55c2d311 TG |
4425 | NULL_RTX); |
4426 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, | |
4427 | NULL_RTX, 0); | |
4428 | if (t4) | |
4429 | { | |
4430 | rtx t5; | |
4431 | t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, | |
4432 | NULL_RTX, 0); | |
38a448ca RH |
4433 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4434 | t4, t5), | |
55c2d311 TG |
4435 | tquotient); |
4436 | } | |
4437 | } | |
4438 | } | |
4439 | ||
4440 | if (quotient != 0) | |
4441 | break; | |
4442 | delete_insns_since (last); | |
4443 | ||
4444 | /* Try using an instruction that produces both the quotient and | |
4445 | remainder, using truncation. We can easily compensate the quotient | |
4446 | or remainder to get floor rounding, once we have the remainder. | |
4447 | Notice that we compute also the final remainder value here, | |
4448 | and return the result right away. */ | |
a45cf58c | 4449 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4450 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4451 | |
55c2d311 TG |
4452 | if (rem_flag) |
4453 | { | |
668443c9 | 4454 | remainder |
f8cfc6aa | 4455 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4456 | quotient = gen_reg_rtx (compute_mode); |
4457 | } | |
4458 | else | |
4459 | { | |
668443c9 | 4460 | quotient |
f8cfc6aa | 4461 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4462 | remainder = gen_reg_rtx (compute_mode); |
4463 | } | |
4464 | ||
4465 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4466 | quotient, remainder, 0)) | |
4467 | { | |
4468 | /* This could be computed with a branch-less sequence. | |
4469 | Save that for later. */ | |
4470 | rtx tem; | |
f3f6fb16 | 4471 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 | 4472 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
55c2d311 TG |
4473 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4474 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4475 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
55c2d311 TG |
4476 | expand_dec (quotient, const1_rtx); |
4477 | expand_inc (remainder, op1); | |
4478 | emit_label (label); | |
c8dbc8ca | 4479 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4480 | } |
4481 | ||
4482 | /* No luck with division elimination or divmod. Have to do it | |
4483 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4484 | { |
f3f6fb16 | 4485 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4486 | rtx adjusted_op0; |
4487 | rtx tem; | |
4488 | ||
4489 | quotient = gen_reg_rtx (compute_mode); | |
4490 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4491 | label1 = gen_label_rtx (); | |
4492 | label2 = gen_label_rtx (); | |
4493 | label3 = gen_label_rtx (); | |
4494 | label4 = gen_label_rtx (); | |
4495 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4496 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4497 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
55c2d311 TG |
4498 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4499 | quotient, 0, OPTAB_LIB_WIDEN); | |
4500 | if (tem != quotient) | |
4501 | emit_move_insn (quotient, tem); | |
4502 | emit_jump_insn (gen_jump (label5)); | |
4503 | emit_barrier (); | |
4504 | emit_label (label1); | |
44037a66 | 4505 | expand_inc (adjusted_op0, const1_rtx); |
55c2d311 TG |
4506 | emit_jump_insn (gen_jump (label4)); |
4507 | emit_barrier (); | |
4508 | emit_label (label2); | |
f5963e61 | 4509 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
55c2d311 TG |
4510 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4511 | quotient, 0, OPTAB_LIB_WIDEN); | |
4512 | if (tem != quotient) | |
4513 | emit_move_insn (quotient, tem); | |
4514 | emit_jump_insn (gen_jump (label5)); | |
4515 | emit_barrier (); | |
4516 | emit_label (label3); | |
4517 | expand_dec (adjusted_op0, const1_rtx); | |
4518 | emit_label (label4); | |
4519 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4520 | quotient, 0, OPTAB_LIB_WIDEN); | |
4521 | if (tem != quotient) | |
4522 | emit_move_insn (quotient, tem); | |
4523 | expand_dec (quotient, const1_rtx); | |
4524 | emit_label (label5); | |
44037a66 | 4525 | } |
55c2d311 | 4526 | break; |
44037a66 | 4527 | |
55c2d311 TG |
4528 | case CEIL_DIV_EXPR: |
4529 | case CEIL_MOD_EXPR: | |
4530 | if (unsignedp) | |
4531 | { | |
9176af2f TG |
4532 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) |
4533 | { | |
4534 | rtx t1, t2, t3; | |
4535 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4536 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4537 | floor_log2 (d), tquotient, 1); |
9176af2f | 4538 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 4539 | gen_int_mode (d - 1, compute_mode), |
9176af2f TG |
4540 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4541 | t3 = gen_reg_rtx (compute_mode); | |
4542 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4543 | compute_mode, 1, 1); | |
412381d9 TG |
4544 | if (t3 == 0) |
4545 | { | |
f3f6fb16 | 4546 | rtx_code_label *lab; |
412381d9 | 4547 | lab = gen_label_rtx (); |
f5963e61 | 4548 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
412381d9 TG |
4549 | expand_inc (t1, const1_rtx); |
4550 | emit_label (lab); | |
4551 | quotient = t1; | |
4552 | } | |
4553 | else | |
38a448ca RH |
4554 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4555 | t1, t3), | |
412381d9 | 4556 | tquotient); |
9176af2f TG |
4557 | break; |
4558 | } | |
55c2d311 TG |
4559 | |
4560 | /* Try using an instruction that produces both the quotient and | |
4561 | remainder, using truncation. We can easily compensate the | |
4562 | quotient or remainder to get ceiling rounding, once we have the | |
4563 | remainder. Notice that we compute also the final remainder | |
4564 | value here, and return the result right away. */ | |
a45cf58c | 4565 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4566 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4567 | |
55c2d311 TG |
4568 | if (rem_flag) |
4569 | { | |
f8cfc6aa | 4570 | remainder = (REG_P (target) |
668443c9 | 4571 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4572 | quotient = gen_reg_rtx (compute_mode); |
4573 | } | |
4574 | else | |
4575 | { | |
f8cfc6aa | 4576 | quotient = (REG_P (target) |
668443c9 | 4577 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4578 | remainder = gen_reg_rtx (compute_mode); |
4579 | } | |
4580 | ||
4581 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4582 | remainder, 1)) | |
4583 | { | |
4584 | /* This could be computed with a branch-less sequence. | |
4585 | Save that for later. */ | |
f3f6fb16 | 4586 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4587 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4588 | compute_mode, label); | |
55c2d311 TG |
4589 | expand_inc (quotient, const1_rtx); |
4590 | expand_dec (remainder, op1); | |
4591 | emit_label (label); | |
c8dbc8ca | 4592 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4593 | } |
4594 | ||
4595 | /* No luck with division elimination or divmod. Have to do it | |
4596 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4597 | { |
f3f6fb16 | 4598 | rtx_code_label *label1, *label2; |
55c2d311 TG |
4599 | rtx adjusted_op0, tem; |
4600 | ||
4601 | quotient = gen_reg_rtx (compute_mode); | |
4602 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4603 | label1 = gen_label_rtx (); | |
4604 | label2 = gen_label_rtx (); | |
f5963e61 JL |
4605 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4606 | compute_mode, label1); | |
55c2d311 TG |
4607 | emit_move_insn (quotient, const0_rtx); |
4608 | emit_jump_insn (gen_jump (label2)); | |
4609 | emit_barrier (); | |
4610 | emit_label (label1); | |
4611 | expand_dec (adjusted_op0, const1_rtx); | |
4612 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4613 | quotient, 1, OPTAB_LIB_WIDEN); | |
4614 | if (tem != quotient) | |
4615 | emit_move_insn (quotient, tem); | |
4616 | expand_inc (quotient, const1_rtx); | |
4617 | emit_label (label2); | |
44037a66 | 4618 | } |
55c2d311 TG |
4619 | } |
4620 | else /* signed */ | |
4621 | { | |
73f27728 RK |
4622 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4623 | && INTVAL (op1) >= 0) | |
4624 | { | |
4625 | /* This is extremely similar to the code for the unsigned case | |
4626 | above. For 2.7 we should merge these variants, but for | |
4627 | 2.6.1 I don't want to touch the code for unsigned since that | |
4628 | get used in C. The signed case will only be used by other | |
4629 | languages (Ada). */ | |
4630 | ||
4631 | rtx t1, t2, t3; | |
4632 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4633 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4634 | floor_log2 (d), tquotient, 0); |
73f27728 | 4635 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 4636 | gen_int_mode (d - 1, compute_mode), |
73f27728 RK |
4637 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4638 | t3 = gen_reg_rtx (compute_mode); | |
4639 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4640 | compute_mode, 1, 1); | |
4641 | if (t3 == 0) | |
4642 | { | |
f3f6fb16 | 4643 | rtx_code_label *lab; |
73f27728 | 4644 | lab = gen_label_rtx (); |
f5963e61 | 4645 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
73f27728 RK |
4646 | expand_inc (t1, const1_rtx); |
4647 | emit_label (lab); | |
4648 | quotient = t1; | |
4649 | } | |
4650 | else | |
38a448ca RH |
4651 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4652 | t1, t3), | |
73f27728 RK |
4653 | tquotient); |
4654 | break; | |
4655 | } | |
4656 | ||
55c2d311 TG |
4657 | /* Try using an instruction that produces both the quotient and |
4658 | remainder, using truncation. We can easily compensate the | |
4659 | quotient or remainder to get ceiling rounding, once we have the | |
4660 | remainder. Notice that we compute also the final remainder | |
4661 | value here, and return the result right away. */ | |
a45cf58c | 4662 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 TG |
4663 | target = gen_reg_rtx (compute_mode); |
4664 | if (rem_flag) | |
4665 | { | |
f8cfc6aa | 4666 | remainder= (REG_P (target) |
668443c9 | 4667 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4668 | quotient = gen_reg_rtx (compute_mode); |
4669 | } | |
4670 | else | |
4671 | { | |
f8cfc6aa | 4672 | quotient = (REG_P (target) |
668443c9 | 4673 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4674 | remainder = gen_reg_rtx (compute_mode); |
4675 | } | |
4676 | ||
4677 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4678 | remainder, 0)) | |
4679 | { | |
4680 | /* This could be computed with a branch-less sequence. | |
4681 | Save that for later. */ | |
4682 | rtx tem; | |
f3f6fb16 | 4683 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4684 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4685 | compute_mode, label); | |
55c2d311 TG |
4686 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4687 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4688 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
55c2d311 TG |
4689 | expand_inc (quotient, const1_rtx); |
4690 | expand_dec (remainder, op1); | |
4691 | emit_label (label); | |
c8dbc8ca | 4692 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4693 | } |
4694 | ||
4695 | /* No luck with division elimination or divmod. Have to do it | |
4696 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4697 | { |
f3f6fb16 | 4698 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4699 | rtx adjusted_op0; |
4700 | rtx tem; | |
4701 | ||
4702 | quotient = gen_reg_rtx (compute_mode); | |
4703 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4704 | label1 = gen_label_rtx (); | |
4705 | label2 = gen_label_rtx (); | |
4706 | label3 = gen_label_rtx (); | |
4707 | label4 = gen_label_rtx (); | |
4708 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4709 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4710 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4711 | compute_mode, label1); | |
55c2d311 TG |
4712 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4713 | quotient, 0, OPTAB_LIB_WIDEN); | |
4714 | if (tem != quotient) | |
4715 | emit_move_insn (quotient, tem); | |
4716 | emit_jump_insn (gen_jump (label5)); | |
4717 | emit_barrier (); | |
4718 | emit_label (label1); | |
4719 | expand_dec (adjusted_op0, const1_rtx); | |
4720 | emit_jump_insn (gen_jump (label4)); | |
4721 | emit_barrier (); | |
4722 | emit_label (label2); | |
f5963e61 JL |
4723 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4724 | compute_mode, label3); | |
55c2d311 TG |
4725 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4726 | quotient, 0, OPTAB_LIB_WIDEN); | |
4727 | if (tem != quotient) | |
4728 | emit_move_insn (quotient, tem); | |
4729 | emit_jump_insn (gen_jump (label5)); | |
4730 | emit_barrier (); | |
4731 | emit_label (label3); | |
4732 | expand_inc (adjusted_op0, const1_rtx); | |
4733 | emit_label (label4); | |
4734 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4735 | quotient, 0, OPTAB_LIB_WIDEN); | |
4736 | if (tem != quotient) | |
4737 | emit_move_insn (quotient, tem); | |
4738 | expand_inc (quotient, const1_rtx); | |
4739 | emit_label (label5); | |
44037a66 | 4740 | } |
55c2d311 TG |
4741 | } |
4742 | break; | |
bc1c7e93 | 4743 | |
55c2d311 TG |
4744 | case EXACT_DIV_EXPR: |
4745 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4746 | { | |
4747 | HOST_WIDE_INT d = INTVAL (op1); | |
4748 | unsigned HOST_WIDE_INT ml; | |
91ce572a | 4749 | int pre_shift; |
55c2d311 TG |
4750 | rtx t1; |
4751 | ||
91ce572a CC |
4752 | pre_shift = floor_log2 (d & -d); |
4753 | ml = invert_mod2n (d >> pre_shift, size); | |
4754 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4755 | pre_shift, NULL_RTX, unsignedp); |
69107307 | 4756 | quotient = expand_mult (compute_mode, t1, |
2496c7bd | 4757 | gen_int_mode (ml, compute_mode), |
31ff3e0b | 4758 | NULL_RTX, 1); |
55c2d311 TG |
4759 | |
4760 | insn = get_last_insn (); | |
7543f918 JR |
4761 | set_dst_reg_note (insn, REG_EQUAL, |
4762 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, | |
4763 | compute_mode, op0, op1), | |
4764 | quotient); | |
55c2d311 TG |
4765 | } |
4766 | break; | |
4767 | ||
4768 | case ROUND_DIV_EXPR: | |
4769 | case ROUND_MOD_EXPR: | |
69f61901 RK |
4770 | if (unsignedp) |
4771 | { | |
4772 | rtx tem; | |
f3f6fb16 | 4773 | rtx_code_label *label; |
69f61901 RK |
4774 | label = gen_label_rtx (); |
4775 | quotient = gen_reg_rtx (compute_mode); | |
4776 | remainder = gen_reg_rtx (compute_mode); | |
4777 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) | |
4778 | { | |
4779 | rtx tem; | |
4780 | quotient = expand_binop (compute_mode, udiv_optab, op0, op1, | |
4781 | quotient, 1, OPTAB_LIB_WIDEN); | |
4782 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); | |
4783 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4784 | remainder, 1, OPTAB_LIB_WIDEN); | |
4785 | } | |
0a81f074 | 4786 | tem = plus_constant (compute_mode, op1, -1); |
eb6c3df1 | 4787 | tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); |
f5963e61 | 4788 | do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); |
69f61901 RK |
4789 | expand_inc (quotient, const1_rtx); |
4790 | expand_dec (remainder, op1); | |
4791 | emit_label (label); | |
4792 | } | |
4793 | else | |
4794 | { | |
4795 | rtx abs_rem, abs_op1, tem, mask; | |
f3f6fb16 | 4796 | rtx_code_label *label; |
69f61901 RK |
4797 | label = gen_label_rtx (); |
4798 | quotient = gen_reg_rtx (compute_mode); | |
4799 | remainder = gen_reg_rtx (compute_mode); | |
4800 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) | |
4801 | { | |
4802 | rtx tem; | |
4803 | quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, | |
4804 | quotient, 0, OPTAB_LIB_WIDEN); | |
4805 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); | |
4806 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4807 | remainder, 0, OPTAB_LIB_WIDEN); | |
4808 | } | |
91ce572a CC |
4809 | abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); |
4810 | abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); | |
69f61901 | 4811 | tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, |
eb6c3df1 | 4812 | 1, NULL_RTX, 1); |
f5963e61 | 4813 | do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); |
69f61901 RK |
4814 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4815 | NULL_RTX, 0, OPTAB_WIDEN); | |
4816 | mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
eb6c3df1 | 4817 | size - 1, NULL_RTX, 0); |
69f61901 RK |
4818 | tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, |
4819 | NULL_RTX, 0, OPTAB_WIDEN); | |
4820 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4821 | NULL_RTX, 0, OPTAB_WIDEN); | |
4822 | expand_inc (quotient, tem); | |
4823 | tem = expand_binop (compute_mode, xor_optab, mask, op1, | |
4824 | NULL_RTX, 0, OPTAB_WIDEN); | |
4825 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4826 | NULL_RTX, 0, OPTAB_WIDEN); | |
4827 | expand_dec (remainder, tem); | |
4828 | emit_label (label); | |
4829 | } | |
4830 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
c410d49e | 4831 | |
e9a25f70 | 4832 | default: |
5b0264cb | 4833 | gcc_unreachable (); |
55c2d311 | 4834 | } |
44037a66 | 4835 | |
55c2d311 | 4836 | if (quotient == 0) |
44037a66 | 4837 | { |
a45cf58c RK |
4838 | if (target && GET_MODE (target) != compute_mode) |
4839 | target = 0; | |
4840 | ||
55c2d311 | 4841 | if (rem_flag) |
44037a66 | 4842 | { |
32fdf36b | 4843 | /* Try to produce the remainder without producing the quotient. |
d6a7951f | 4844 | If we seem to have a divmod pattern that does not require widening, |
b20b352b | 4845 | don't try widening here. We should really have a WIDEN argument |
32fdf36b TG |
4846 | to expand_twoval_binop, since what we'd really like to do here is |
4847 | 1) try a mod insn in compute_mode | |
4848 | 2) try a divmod insn in compute_mode | |
4849 | 3) try a div insn in compute_mode and multiply-subtract to get | |
4850 | remainder | |
4851 | 4) try the same things with widening allowed. */ | |
4852 | remainder | |
4853 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4854 | op0, op1, target, | |
4855 | unsignedp, | |
947131ba | 4856 | ((optab_handler (optab2, compute_mode) |
32fdf36b TG |
4857 | != CODE_FOR_nothing) |
4858 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
55c2d311 | 4859 | if (remainder == 0) |
44037a66 TG |
4860 | { |
4861 | /* No luck there. Can we do remainder and divide at once | |
4862 | without a library call? */ | |
55c2d311 TG |
4863 | remainder = gen_reg_rtx (compute_mode); |
4864 | if (! expand_twoval_binop ((unsignedp | |
4865 | ? udivmod_optab | |
4866 | : sdivmod_optab), | |
4867 | op0, op1, | |
4868 | NULL_RTX, remainder, unsignedp)) | |
4869 | remainder = 0; | |
44037a66 | 4870 | } |
55c2d311 TG |
4871 | |
4872 | if (remainder) | |
4873 | return gen_lowpart (mode, remainder); | |
44037a66 | 4874 | } |
44037a66 | 4875 | |
dc38b292 RK |
4876 | /* Produce the quotient. Try a quotient insn, but not a library call. |
4877 | If we have a divmod in this mode, use it in preference to widening | |
4878 | the div (for this test we assume it will not fail). Note that optab2 | |
4879 | is set to the one of the two optabs that the call below will use. */ | |
4880 | quotient | |
4881 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
4882 | op0, op1, rem_flag ? NULL_RTX : target, | |
4883 | unsignedp, | |
947131ba | 4884 | ((optab_handler (optab2, compute_mode) |
dc38b292 RK |
4885 | != CODE_FOR_nothing) |
4886 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
4887 | ||
55c2d311 | 4888 | if (quotient == 0) |
44037a66 TG |
4889 | { |
4890 | /* No luck there. Try a quotient-and-remainder insn, | |
4891 | keeping the quotient alone. */ | |
55c2d311 | 4892 | quotient = gen_reg_rtx (compute_mode); |
44037a66 | 4893 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
55c2d311 TG |
4894 | op0, op1, |
4895 | quotient, NULL_RTX, unsignedp)) | |
4896 | { | |
4897 | quotient = 0; | |
4898 | if (! rem_flag) | |
4899 | /* Still no luck. If we are not computing the remainder, | |
4900 | use a library call for the quotient. */ | |
4901 | quotient = sign_expand_binop (compute_mode, | |
4902 | udiv_optab, sdiv_optab, | |
4903 | op0, op1, target, | |
4904 | unsignedp, OPTAB_LIB_WIDEN); | |
4905 | } | |
44037a66 | 4906 | } |
44037a66 TG |
4907 | } |
4908 | ||
44037a66 TG |
4909 | if (rem_flag) |
4910 | { | |
a45cf58c RK |
4911 | if (target && GET_MODE (target) != compute_mode) |
4912 | target = 0; | |
4913 | ||
55c2d311 | 4914 | if (quotient == 0) |
b3f8d95d MM |
4915 | { |
4916 | /* No divide instruction either. Use library for remainder. */ | |
4917 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4918 | op0, op1, target, | |
4919 | unsignedp, OPTAB_LIB_WIDEN); | |
4920 | /* No remainder function. Try a quotient-and-remainder | |
4921 | function, keeping the remainder. */ | |
4922 | if (!remainder) | |
4923 | { | |
4924 | remainder = gen_reg_rtx (compute_mode); | |
b8698a0f | 4925 | if (!expand_twoval_binop_libfunc |
b3f8d95d MM |
4926 | (unsignedp ? udivmod_optab : sdivmod_optab, |
4927 | op0, op1, | |
4928 | NULL_RTX, remainder, | |
4929 | unsignedp ? UMOD : MOD)) | |
4930 | remainder = NULL_RTX; | |
4931 | } | |
4932 | } | |
44037a66 TG |
4933 | else |
4934 | { | |
4935 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
55c2d311 TG |
4936 | remainder = expand_mult (compute_mode, quotient, op1, |
4937 | NULL_RTX, unsignedp); | |
4938 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
4939 | remainder, target, unsignedp, | |
4940 | OPTAB_LIB_WIDEN); | |
44037a66 TG |
4941 | } |
4942 | } | |
4943 | ||
55c2d311 | 4944 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
44037a66 TG |
4945 | } |
4946 | \f | |
4947 | /* Return a tree node with data type TYPE, describing the value of X. | |
4dfa0342 | 4948 | Usually this is an VAR_DECL, if there is no obvious better choice. |
44037a66 | 4949 | X may be an expression, however we only support those expressions |
6d2f8887 | 4950 | generated by loop.c. */ |
44037a66 TG |
4951 | |
4952 | tree | |
502b8322 | 4953 | make_tree (tree type, rtx x) |
44037a66 TG |
4954 | { |
4955 | tree t; | |
4956 | ||
4957 | switch (GET_CODE (x)) | |
4958 | { | |
4959 | case CONST_INT: | |
807e902e KZ |
4960 | case CONST_WIDE_INT: |
4961 | t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type))); | |
4962 | return t; | |
b8698a0f | 4963 | |
44037a66 | 4964 | case CONST_DOUBLE: |
807e902e KZ |
4965 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
4966 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) | |
4967 | t = wide_int_to_tree (type, | |
4968 | wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, | |
4969 | HOST_BITS_PER_WIDE_INT * 2)); | |
44037a66 TG |
4970 | else |
4971 | { | |
4972 | REAL_VALUE_TYPE d; | |
4973 | ||
4974 | REAL_VALUE_FROM_CONST_DOUBLE (d, x); | |
4975 | t = build_real (type, d); | |
4976 | } | |
4977 | ||
4978 | return t; | |
69ef87e2 AH |
4979 | |
4980 | case CONST_VECTOR: | |
4981 | { | |
b8b7f162 RS |
4982 | int units = CONST_VECTOR_NUNITS (x); |
4983 | tree itype = TREE_TYPE (type); | |
d2a12ae7 | 4984 | tree *elts; |
b8b7f162 | 4985 | int i; |
69ef87e2 | 4986 | |
69ef87e2 | 4987 | /* Build a tree with vector elements. */ |
d2a12ae7 | 4988 | elts = XALLOCAVEC (tree, units); |
69ef87e2 AH |
4989 | for (i = units - 1; i >= 0; --i) |
4990 | { | |
b8b7f162 | 4991 | rtx elt = CONST_VECTOR_ELT (x, i); |
d2a12ae7 | 4992 | elts[i] = make_tree (itype, elt); |
69ef87e2 | 4993 | } |
c410d49e | 4994 | |
d2a12ae7 | 4995 | return build_vector (type, elts); |
69ef87e2 AH |
4996 | } |
4997 | ||
44037a66 | 4998 | case PLUS: |
4845b383 KH |
4999 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5000 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5001 | |
44037a66 | 5002 | case MINUS: |
4845b383 KH |
5003 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5004 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5005 | |
44037a66 | 5006 | case NEG: |
4845b383 | 5007 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
44037a66 TG |
5008 | |
5009 | case MULT: | |
4845b383 KH |
5010 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5011 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5012 | |
44037a66 | 5013 | case ASHIFT: |
4845b383 KH |
5014 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5015 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5016 | |
44037a66 | 5017 | case LSHIFTRT: |
ca5ba2a3 | 5018 | t = unsigned_type_for (type); |
aeba6c28 JM |
5019 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5020 | make_tree (t, XEXP (x, 0)), | |
5021 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5022 | |
44037a66 | 5023 | case ASHIFTRT: |
12753674 | 5024 | t = signed_type_for (type); |
aeba6c28 JM |
5025 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5026 | make_tree (t, XEXP (x, 0)), | |
5027 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5028 | |
44037a66 TG |
5029 | case DIV: |
5030 | if (TREE_CODE (type) != REAL_TYPE) | |
12753674 | 5031 | t = signed_type_for (type); |
44037a66 TG |
5032 | else |
5033 | t = type; | |
5034 | ||
aeba6c28 JM |
5035 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5036 | make_tree (t, XEXP (x, 0)), | |
5037 | make_tree (t, XEXP (x, 1)))); | |
44037a66 | 5038 | case UDIV: |
ca5ba2a3 | 5039 | t = unsigned_type_for (type); |
aeba6c28 JM |
5040 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5041 | make_tree (t, XEXP (x, 0)), | |
5042 | make_tree (t, XEXP (x, 1)))); | |
5c45425b RH |
5043 | |
5044 | case SIGN_EXTEND: | |
5045 | case ZERO_EXTEND: | |
ae2bcd98 RS |
5046 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5047 | GET_CODE (x) == ZERO_EXTEND); | |
aeba6c28 | 5048 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5c45425b | 5049 | |
84816907 JM |
5050 | case CONST: |
5051 | return make_tree (type, XEXP (x, 0)); | |
5052 | ||
5053 | case SYMBOL_REF: | |
5054 | t = SYMBOL_REF_DECL (x); | |
5055 | if (t) | |
5056 | return fold_convert (type, build_fold_addr_expr (t)); | |
5057 | /* else fall through. */ | |
5058 | ||
4dfa0342 | 5059 | default: |
c2255bc4 | 5060 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
d1608933 | 5061 | |
d4ebfa65 BE |
5062 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5063 | address mode to pointer mode. */ | |
5ae6cd0d | 5064 | if (POINTER_TYPE_P (type)) |
d4ebfa65 BE |
5065 | x = convert_memory_address_addr_space |
5066 | (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); | |
d1608933 | 5067 | |
8a0aa06e RH |
5068 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5069 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
820cc88f | 5070 | t->decl_with_rtl.rtl = x; |
4dfa0342 | 5071 | |
44037a66 TG |
5072 | return t; |
5073 | } | |
5074 | } | |
44037a66 TG |
5075 | \f |
5076 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5077 | and returning TARGET. | |
5078 | ||
5079 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5080 | ||
5081 | rtx | |
ef4bddc2 | 5082 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
44037a66 | 5083 | { |
22273300 | 5084 | rtx tem = 0; |
44037a66 | 5085 | |
22273300 JJ |
5086 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5087 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5088 | if (tem == 0) | |
44037a66 | 5089 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
44037a66 TG |
5090 | |
5091 | if (target == 0) | |
5092 | target = tem; | |
5093 | else if (tem != target) | |
5094 | emit_move_insn (target, tem); | |
5095 | return target; | |
5096 | } | |
495499da | 5097 | |
a41a56b6 RE |
5098 | /* Helper function for emit_store_flag. */ |
5099 | static rtx | |
ef12ae45 | 5100 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
ef4bddc2 | 5101 | machine_mode mode, machine_mode compare_mode, |
92355a9c | 5102 | int unsignedp, rtx x, rtx y, int normalizep, |
ef4bddc2 | 5103 | machine_mode target_mode) |
a41a56b6 | 5104 | { |
a5c7d693 | 5105 | struct expand_operand ops[4]; |
f3f6fb16 DM |
5106 | rtx op0, comparison, subtarget; |
5107 | rtx_insn *last; | |
ef4bddc2 | 5108 | machine_mode result_mode = targetm.cstore_mode (icode); |
45475a3f PB |
5109 | |
5110 | last = get_last_insn (); | |
5111 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5112 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
a5c7d693 | 5113 | if (!x || !y) |
45475a3f PB |
5114 | { |
5115 | delete_insns_since (last); | |
5116 | return NULL_RTX; | |
5117 | } | |
5118 | ||
92355a9c PB |
5119 | if (target_mode == VOIDmode) |
5120 | target_mode = result_mode; | |
5121 | if (!target) | |
5122 | target = gen_reg_rtx (target_mode); | |
b8698a0f | 5123 | |
a5c7d693 | 5124 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
45475a3f | 5125 | |
a5c7d693 RS |
5126 | create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); |
5127 | create_fixed_operand (&ops[1], comparison); | |
5128 | create_fixed_operand (&ops[2], x); | |
5129 | create_fixed_operand (&ops[3], y); | |
5130 | if (!maybe_expand_insn (icode, 4, ops)) | |
5131 | { | |
5132 | delete_insns_since (last); | |
5133 | return NULL_RTX; | |
5134 | } | |
5135 | subtarget = ops[0].value; | |
495499da | 5136 | |
a41a56b6 RE |
5137 | /* If we are converting to a wider mode, first convert to |
5138 | TARGET_MODE, then normalize. This produces better combining | |
5139 | opportunities on machines that have a SIGN_EXTRACT when we are | |
5140 | testing a single bit. This mostly benefits the 68k. | |
5141 | ||
5142 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5143 | interpreted in MODE, we can do this conversion as unsigned, which | |
5144 | is usually more efficient. */ | |
45475a3f | 5145 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) |
a41a56b6 RE |
5146 | { |
5147 | convert_move (target, subtarget, | |
2d0c270f BS |
5148 | val_signbit_known_clear_p (result_mode, |
5149 | STORE_FLAG_VALUE)); | |
a41a56b6 | 5150 | op0 = target; |
45475a3f | 5151 | result_mode = target_mode; |
a41a56b6 RE |
5152 | } |
5153 | else | |
5154 | op0 = subtarget; | |
5155 | ||
5156 | /* If we want to keep subexpressions around, don't reuse our last | |
5157 | target. */ | |
5158 | if (optimize) | |
5159 | subtarget = 0; | |
5160 | ||
5161 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5162 | have to do anything. */ | |
5163 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5164 | ; | |
5165 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5166 | the comparison this way to avoid a compiler-time warning. */ | |
5167 | else if (- normalizep == STORE_FLAG_VALUE) | |
45475a3f | 5168 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
a41a56b6 RE |
5169 | |
5170 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5171 | it hard to use a value of just the sign bit due to ANSI integer | |
5172 | constant typing rules. */ | |
2d0c270f | 5173 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
45475a3f | 5174 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, |
eb6c3df1 | 5175 | GET_MODE_BITSIZE (result_mode) - 1, subtarget, |
a41a56b6 RE |
5176 | normalizep == 1); |
5177 | else | |
5178 | { | |
5179 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5180 | ||
45475a3f | 5181 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
a41a56b6 | 5182 | if (normalizep == -1) |
45475a3f | 5183 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
a41a56b6 RE |
5184 | } |
5185 | ||
5186 | /* If we were converting to a smaller mode, do the conversion now. */ | |
45475a3f | 5187 | if (target_mode != result_mode) |
a41a56b6 RE |
5188 | { |
5189 | convert_move (target, op0, 0); | |
5190 | return target; | |
5191 | } | |
5192 | else | |
5193 | return op0; | |
5194 | } | |
5195 | ||
44037a66 | 5196 | |
ef12ae45 PB |
5197 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5198 | need a recursive call. These are kept separate to avoid infinite | |
5199 | loops. */ | |
44037a66 | 5200 | |
ef12ae45 PB |
5201 | static rtx |
5202 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
ef4bddc2 RS |
5203 | machine_mode mode, int unsignedp, int normalizep, |
5204 | machine_mode target_mode) | |
44037a66 TG |
5205 | { |
5206 | rtx subtarget; | |
5207 | enum insn_code icode; | |
ef4bddc2 | 5208 | machine_mode compare_mode; |
f90b7a5a | 5209 | enum mode_class mclass; |
45475a3f | 5210 | enum rtx_code scode; |
ef12ae45 | 5211 | rtx tem; |
44037a66 | 5212 | |
b30f05db BS |
5213 | if (unsignedp) |
5214 | code = unsigned_condition (code); | |
45475a3f | 5215 | scode = swap_condition (code); |
b30f05db | 5216 | |
c2615a67 RK |
5217 | /* If one operand is constant, make it the second one. Only do this |
5218 | if the other operand is not constant as well. */ | |
5219 | ||
8c9864f3 | 5220 | if (swap_commutative_operands_p (op0, op1)) |
c2615a67 RK |
5221 | { |
5222 | tem = op0; | |
5223 | op0 = op1; | |
5224 | op1 = tem; | |
5225 | code = swap_condition (code); | |
5226 | } | |
5227 | ||
6405e07b DE |
5228 | if (mode == VOIDmode) |
5229 | mode = GET_MODE (op0); | |
5230 | ||
c410d49e | 5231 | /* For some comparisons with 1 and -1, we can convert this to |
44037a66 | 5232 | comparisons with zero. This will often produce more opportunities for |
0f41302f | 5233 | store-flag insns. */ |
44037a66 TG |
5234 | |
5235 | switch (code) | |
5236 | { | |
5237 | case LT: | |
5238 | if (op1 == const1_rtx) | |
5239 | op1 = const0_rtx, code = LE; | |
5240 | break; | |
5241 | case LE: | |
5242 | if (op1 == constm1_rtx) | |
5243 | op1 = const0_rtx, code = LT; | |
5244 | break; | |
5245 | case GE: | |
5246 | if (op1 == const1_rtx) | |
5247 | op1 = const0_rtx, code = GT; | |
5248 | break; | |
5249 | case GT: | |
5250 | if (op1 == constm1_rtx) | |
5251 | op1 = const0_rtx, code = GE; | |
5252 | break; | |
5253 | case GEU: | |
5254 | if (op1 == const1_rtx) | |
5255 | op1 = const0_rtx, code = NE; | |
5256 | break; | |
5257 | case LTU: | |
5258 | if (op1 == const1_rtx) | |
5259 | op1 = const0_rtx, code = EQ; | |
5260 | break; | |
e9a25f70 JL |
5261 | default: |
5262 | break; | |
44037a66 TG |
5263 | } |
5264 | ||
884815aa JB |
5265 | /* If we are comparing a double-word integer with zero or -1, we can |
5266 | convert the comparison into one involving a single word. */ | |
6912b84b RK |
5267 | if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 |
5268 | && GET_MODE_CLASS (mode) == MODE_INT | |
3c0cb5de | 5269 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
6912b84b | 5270 | { |
884815aa JB |
5271 | if ((code == EQ || code == NE) |
5272 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
6912b84b | 5273 | { |
1ed20a40 | 5274 | rtx op00, op01; |
8433f113 | 5275 | |
a41a56b6 RE |
5276 | /* Do a logical OR or AND of the two words and compare the |
5277 | result. */ | |
8433f113 RH |
5278 | op00 = simplify_gen_subreg (word_mode, op0, mode, 0); |
5279 | op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); | |
1ed20a40 PB |
5280 | tem = expand_binop (word_mode, |
5281 | op1 == const0_rtx ? ior_optab : and_optab, | |
5282 | op00, op01, NULL_RTX, unsignedp, | |
5283 | OPTAB_DIRECT); | |
884815aa | 5284 | |
1ed20a40 PB |
5285 | if (tem != 0) |
5286 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
92355a9c | 5287 | unsignedp, normalizep); |
6912b84b | 5288 | } |
884815aa | 5289 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
8433f113 RH |
5290 | { |
5291 | rtx op0h; | |
5292 | ||
5293 | /* If testing the sign bit, can just test on high word. */ | |
5294 | op0h = simplify_gen_subreg (word_mode, op0, mode, | |
a41a56b6 RE |
5295 | subreg_highpart_offset (word_mode, |
5296 | mode)); | |
1ed20a40 PB |
5297 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5298 | unsignedp, normalizep); | |
5299 | } | |
5300 | else | |
5301 | tem = NULL_RTX; | |
5302 | ||
5303 | if (tem) | |
5304 | { | |
92355a9c | 5305 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
1ed20a40 | 5306 | return tem; |
92355a9c PB |
5307 | if (!target) |
5308 | target = gen_reg_rtx (target_mode); | |
1ed20a40 PB |
5309 | |
5310 | convert_move (target, tem, | |
2d0c270f BS |
5311 | !val_signbit_known_set_p (word_mode, |
5312 | (normalizep ? normalizep | |
5313 | : STORE_FLAG_VALUE))); | |
1ed20a40 | 5314 | return target; |
8433f113 | 5315 | } |
6912b84b RK |
5316 | } |
5317 | ||
44037a66 TG |
5318 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5319 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5320 | if (op1 == const0_rtx && (code == LT || code == GE) | |
5321 | && GET_MODE_CLASS (mode) == MODE_INT | |
5322 | && (normalizep || STORE_FLAG_VALUE == 1 | |
2d0c270f | 5323 | || val_signbit_p (mode, STORE_FLAG_VALUE))) |
44037a66 | 5324 | { |
8deb7047 | 5325 | subtarget = target; |
44037a66 | 5326 | |
495499da PB |
5327 | if (!target) |
5328 | target_mode = mode; | |
5329 | ||
44037a66 TG |
5330 | /* If the result is to be wider than OP0, it is best to convert it |
5331 | first. If it is to be narrower, it is *incorrect* to convert it | |
5332 | first. */ | |
495499da | 5333 | else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) |
44037a66 | 5334 | { |
81722fa9 | 5335 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5336 | mode = target_mode; |
5337 | } | |
5338 | ||
5339 | if (target_mode != mode) | |
5340 | subtarget = 0; | |
5341 | ||
5342 | if (code == GE) | |
1d6eaf3d RK |
5343 | op0 = expand_unop (mode, one_cmpl_optab, op0, |
5344 | ((STORE_FLAG_VALUE == 1 || normalizep) | |
5345 | ? 0 : subtarget), 0); | |
44037a66 | 5346 | |
1d6eaf3d | 5347 | if (STORE_FLAG_VALUE == 1 || normalizep) |
44037a66 TG |
5348 | /* If we are supposed to produce a 0/1 value, we want to do |
5349 | a logical shift from the sign bit to the low-order bit; for | |
5350 | a -1/0 value, we do an arithmetic shift. */ | |
5351 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 5352 | GET_MODE_BITSIZE (mode) - 1, |
44037a66 TG |
5353 | subtarget, normalizep != -1); |
5354 | ||
5355 | if (mode != target_mode) | |
c2ec26b8 | 5356 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5357 | |
5358 | return op0; | |
5359 | } | |
5360 | ||
f90b7a5a PB |
5361 | mclass = GET_MODE_CLASS (mode); |
5362 | for (compare_mode = mode; compare_mode != VOIDmode; | |
5363 | compare_mode = GET_MODE_WIDER_MODE (compare_mode)) | |
a41a56b6 | 5364 | { |
ef4bddc2 | 5365 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
947131ba | 5366 | icode = optab_handler (cstore_optab, optab_mode); |
f90b7a5a | 5367 | if (icode != CODE_FOR_nothing) |
a41a56b6 | 5368 | { |
a41a56b6 | 5369 | do_pending_stack_adjust (); |
ef12ae45 | 5370 | tem = emit_cstore (target, icode, code, mode, compare_mode, |
92355a9c | 5371 | unsignedp, op0, op1, normalizep, target_mode); |
45475a3f PB |
5372 | if (tem) |
5373 | return tem; | |
44037a66 | 5374 | |
45475a3f | 5375 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
44037a66 | 5376 | { |
ef12ae45 | 5377 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
92355a9c | 5378 | unsignedp, op1, op0, normalizep, target_mode); |
45475a3f PB |
5379 | if (tem) |
5380 | return tem; | |
44037a66 | 5381 | } |
f90b7a5a | 5382 | break; |
44037a66 TG |
5383 | } |
5384 | } | |
5385 | ||
ef12ae45 PB |
5386 | return 0; |
5387 | } | |
5388 | ||
5389 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 | |
5390 | and storing in TARGET. Normally return TARGET. | |
5391 | Return 0 if that cannot be done. | |
5392 | ||
5393 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5394 | it is VOIDmode, they cannot both be CONST_INT. | |
5395 | ||
5396 | UNSIGNEDP is for the case where we have to widen the operands | |
5397 | to perform the operation. It says to use zero-extension. | |
5398 | ||
5399 | NORMALIZEP is 1 if we should convert the result to be either zero | |
5400 | or one. Normalize is -1 if we should convert the result to be | |
5401 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
5402 | "raw" out of the scc insn. */ | |
5403 | ||
5404 | rtx | |
5405 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
ef4bddc2 | 5406 | machine_mode mode, int unsignedp, int normalizep) |
ef12ae45 | 5407 | { |
ef4bddc2 | 5408 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
ef12ae45 PB |
5409 | enum rtx_code rcode; |
5410 | rtx subtarget; | |
f3f6fb16 DM |
5411 | rtx tem, trueval; |
5412 | rtx_insn *last; | |
ef12ae45 | 5413 | |
b2b262e3 JR |
5414 | /* If we compare constants, we shouldn't use a store-flag operation, |
5415 | but a constant load. We can get there via the vanilla route that | |
5416 | usually generates a compare-branch sequence, but will in this case | |
5417 | fold the comparison to a constant, and thus elide the branch. */ | |
5418 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) | |
5419 | return NULL_RTX; | |
5420 | ||
92355a9c PB |
5421 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, |
5422 | target_mode); | |
ef12ae45 PB |
5423 | if (tem) |
5424 | return tem; | |
44037a66 | 5425 | |
495499da PB |
5426 | /* If we reached here, we can't do this with a scc insn, however there |
5427 | are some comparisons that can be done in other ways. Don't do any | |
5428 | of these cases if branches are very cheap. */ | |
5429 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
5430 | return 0; | |
5431 | ||
5432 | /* See what we need to return. We can only return a 1, -1, or the | |
5433 | sign bit. */ | |
5434 | ||
5435 | if (normalizep == 0) | |
5436 | { | |
5437 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5438 | normalizep = STORE_FLAG_VALUE; | |
5439 | ||
2d0c270f | 5440 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) |
495499da PB |
5441 | ; |
5442 | else | |
5443 | return 0; | |
5444 | } | |
5445 | ||
ef12ae45 PB |
5446 | last = get_last_insn (); |
5447 | ||
7c27e184 PB |
5448 | /* If optimizing, use different pseudo registers for each insn, instead |
5449 | of reusing the same pseudo. This leads to better CSE, but slows | |
5450 | down the compiler, since there are more pseudos */ | |
5451 | subtarget = (!optimize | |
91e66235 | 5452 | && (target_mode == mode)) ? target : NULL_RTX; |
495499da PB |
5453 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); |
5454 | ||
5455 | /* For floating-point comparisons, try the reverse comparison or try | |
5456 | changing the "orderedness" of the comparison. */ | |
5457 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
5458 | { | |
5459 | enum rtx_code first_code; | |
5460 | bool and_them; | |
5461 | ||
5462 | rcode = reverse_condition_maybe_unordered (code); | |
5463 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
5464 | && (code == ORDERED || code == UNORDERED | |
5465 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5466 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5467 | { | |
533d4b99 PB |
5468 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5469 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5470 | ||
495499da | 5471 | /* For the reverse comparison, use either an addition or a XOR. */ |
533d4b99 | 5472 | if (want_add |
68f932c4 | 5473 | && rtx_cost (GEN_INT (normalizep), PLUS, 1, |
533d4b99 | 5474 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5475 | { |
ef12ae45 | 5476 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5477 | STORE_FLAG_VALUE, target_mode); |
495499da PB |
5478 | if (tem) |
5479 | return expand_binop (target_mode, add_optab, tem, | |
2f1cd2eb | 5480 | gen_int_mode (normalizep, target_mode), |
495499da PB |
5481 | target, 0, OPTAB_WIDEN); |
5482 | } | |
533d4b99 | 5483 | else if (!want_add |
68f932c4 | 5484 | && rtx_cost (trueval, XOR, 1, |
533d4b99 | 5485 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5486 | { |
ef12ae45 | 5487 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5488 | normalizep, target_mode); |
495499da PB |
5489 | if (tem) |
5490 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
5491 | target, INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5492 | } | |
5493 | } | |
5494 | ||
5495 | delete_insns_since (last); | |
5496 | ||
5497 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
5498 | if (code == ORDERED || code == UNORDERED) | |
5499 | return 0; | |
b8698a0f | 5500 | |
495499da PB |
5501 | and_them = split_comparison (code, mode, &first_code, &code); |
5502 | ||
5503 | /* If there are no NaNs, the first comparison should always fall through. | |
5504 | Effectively change the comparison to the other one. */ | |
5505 | if (!HONOR_NANS (mode)) | |
5506 | { | |
5507 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
92355a9c PB |
5508 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, |
5509 | target_mode); | |
495499da PB |
5510 | } |
5511 | ||
5512 | #ifdef HAVE_conditional_move | |
5513 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a | |
5514 | conditional move. */ | |
92355a9c PB |
5515 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, |
5516 | normalizep, target_mode); | |
495499da PB |
5517 | if (tem == 0) |
5518 | return 0; | |
5519 | ||
5520 | if (and_them) | |
5521 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5522 | tem, const0_rtx, GET_MODE (tem), 0); | |
5523 | else | |
5524 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5525 | trueval, tem, GET_MODE (tem), 0); | |
5526 | ||
5527 | if (tem == 0) | |
5528 | delete_insns_since (last); | |
5529 | return tem; | |
5530 | #else | |
5531 | return 0; | |
5532 | #endif | |
5533 | } | |
44037a66 | 5534 | |
495499da PB |
5535 | /* The remaining tricks only apply to integer comparisons. */ |
5536 | ||
5537 | if (GET_MODE_CLASS (mode) != MODE_INT) | |
5538 | return 0; | |
5539 | ||
5540 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
44037a66 TG |
5541 | (or subtract) the two operands and use a recursive call to try the |
5542 | comparison with zero. Don't do any of these cases if branches are | |
5543 | very cheap. */ | |
5544 | ||
495499da | 5545 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
44037a66 TG |
5546 | { |
5547 | tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, | |
5548 | OPTAB_WIDEN); | |
5549 | ||
5550 | if (tem == 0) | |
5551 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5552 | OPTAB_WIDEN); | |
5553 | if (tem != 0) | |
a22fb74c AK |
5554 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5555 | mode, unsignedp, normalizep); | |
495499da PB |
5556 | if (tem != 0) |
5557 | return tem; | |
5558 | ||
5559 | delete_insns_since (last); | |
5560 | } | |
5561 | ||
5562 | /* For integer comparisons, try the reverse comparison. However, for | |
5563 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5564 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
5565 | rcode = reverse_condition (code); | |
5566 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
947131ba | 5567 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
495499da PB |
5568 | && code == NE |
5569 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5570 | && op1 == const0_rtx)) | |
5571 | { | |
533d4b99 PB |
5572 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5573 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5574 | ||
495499da | 5575 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
533d4b99 | 5576 | if (want_add |
68f932c4 | 5577 | && rtx_cost (GEN_INT (normalizep), PLUS, 1, |
533d4b99 | 5578 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5579 | { |
ef12ae45 | 5580 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5581 | STORE_FLAG_VALUE, target_mode); |
495499da PB |
5582 | if (tem != 0) |
5583 | tem = expand_binop (target_mode, add_optab, tem, | |
2f1cd2eb RS |
5584 | gen_int_mode (normalizep, target_mode), |
5585 | target, 0, OPTAB_WIDEN); | |
495499da | 5586 | } |
533d4b99 | 5587 | else if (!want_add |
68f932c4 | 5588 | && rtx_cost (trueval, XOR, 1, |
533d4b99 | 5589 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5590 | { |
ef12ae45 | 5591 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5592 | normalizep, target_mode); |
495499da PB |
5593 | if (tem != 0) |
5594 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, | |
5595 | INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5596 | } | |
5597 | ||
5598 | if (tem != 0) | |
5599 | return tem; | |
5600 | delete_insns_since (last); | |
44037a66 TG |
5601 | } |
5602 | ||
c410d49e | 5603 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
44037a66 TG |
5604 | the constant zero. Reject all other comparisons at this point. Only |
5605 | do LE and GT if branches are expensive since they are expensive on | |
5606 | 2-operand machines. */ | |
5607 | ||
495499da | 5608 | if (op1 != const0_rtx |
44037a66 | 5609 | || (code != EQ && code != NE |
3a4fd356 JH |
5610 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5611 | false) <= 1 || (code != LE && code != GT)))) | |
44037a66 TG |
5612 | return 0; |
5613 | ||
44037a66 TG |
5614 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5615 | do the necessary operation below. */ | |
5616 | ||
5617 | tem = 0; | |
5618 | ||
5619 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5620 | the sign bit set. */ | |
5621 | ||
5622 | if (code == LE) | |
5623 | { | |
5624 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5625 | if (rtx_equal_p (subtarget, op0)) | |
5626 | subtarget = 0; | |
5627 | ||
5628 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5629 | OPTAB_WIDEN); | |
5630 | if (tem) | |
5631 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5632 | OPTAB_WIDEN); | |
5633 | } | |
5634 | ||
5635 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5636 | number of bits in the mode of OP0, minus one. */ | |
5637 | ||
5638 | if (code == GT) | |
5639 | { | |
5640 | if (rtx_equal_p (subtarget, op0)) | |
5641 | subtarget = 0; | |
5642 | ||
5643 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 5644 | GET_MODE_BITSIZE (mode) - 1, |
44037a66 TG |
5645 | subtarget, 0); |
5646 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5647 | OPTAB_WIDEN); | |
5648 | } | |
c410d49e | 5649 | |
44037a66 TG |
5650 | if (code == EQ || code == NE) |
5651 | { | |
5652 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
cc2902df | 5653 | that converts the operand into a positive number if it is nonzero |
44037a66 TG |
5654 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5655 | for NE we negate. This puts the result in the sign bit. Then we | |
c410d49e | 5656 | normalize with a shift, if needed. |
44037a66 TG |
5657 | |
5658 | Two operations that can do the above actions are ABS and FFS, so try | |
5659 | them. If that doesn't work, and MODE is smaller than a full word, | |
36d747f6 | 5660 | we can use zero-extension to the wider mode (an unsigned conversion) |
44037a66 TG |
5661 | as the operation. */ |
5662 | ||
c410d49e EC |
5663 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5664 | that is compensated by the subsequent overflow when subtracting | |
30f7a378 | 5665 | one / negating. */ |
91ce572a | 5666 | |
947131ba | 5667 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
44037a66 | 5668 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
947131ba | 5669 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
44037a66 TG |
5670 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5671 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5672 | { | |
c2ec26b8 | 5673 | tem = convert_modes (word_mode, mode, op0, 1); |
81722fa9 | 5674 | mode = word_mode; |
44037a66 TG |
5675 | } |
5676 | ||
5677 | if (tem != 0) | |
5678 | { | |
5679 | if (code == EQ) | |
5680 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5681 | 0, OPTAB_WIDEN); | |
5682 | else | |
5683 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5684 | } | |
5685 | ||
5686 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5687 | of the value with itself. For EQ, we take the one's complement of | |
5688 | that "or", which is an extra insn, so we only handle EQ if branches | |
5689 | are expensive. */ | |
5690 | ||
3a4fd356 JH |
5691 | if (tem == 0 |
5692 | && (code == NE | |
5693 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
5694 | false) > 1)) | |
44037a66 | 5695 | { |
36d747f6 RS |
5696 | if (rtx_equal_p (subtarget, op0)) |
5697 | subtarget = 0; | |
5698 | ||
44037a66 TG |
5699 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5700 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5701 | OPTAB_WIDEN); | |
5702 | ||
5703 | if (tem && code == EQ) | |
5704 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5705 | } | |
5706 | } | |
5707 | ||
5708 | if (tem && normalizep) | |
5709 | tem = expand_shift (RSHIFT_EXPR, mode, tem, | |
eb6c3df1 | 5710 | GET_MODE_BITSIZE (mode) - 1, |
91e66235 | 5711 | subtarget, normalizep == 1); |
44037a66 | 5712 | |
91e66235 | 5713 | if (tem) |
44037a66 | 5714 | { |
495499da PB |
5715 | if (!target) |
5716 | ; | |
5717 | else if (GET_MODE (tem) != target_mode) | |
91e66235 MM |
5718 | { |
5719 | convert_move (target, tem, 0); | |
5720 | tem = target; | |
5721 | } | |
5722 | else if (!subtarget) | |
5723 | { | |
5724 | emit_move_insn (target, tem); | |
5725 | tem = target; | |
5726 | } | |
44037a66 | 5727 | } |
91e66235 | 5728 | else |
44037a66 TG |
5729 | delete_insns_since (last); |
5730 | ||
5731 | return tem; | |
5732 | } | |
04a8ee2f TG |
5733 | |
5734 | /* Like emit_store_flag, but always succeeds. */ | |
5735 | ||
5736 | rtx | |
502b8322 | 5737 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
ef4bddc2 | 5738 | machine_mode mode, int unsignedp, int normalizep) |
04a8ee2f | 5739 | { |
f3f6fb16 DM |
5740 | rtx tem; |
5741 | rtx_code_label *label; | |
495499da | 5742 | rtx trueval, falseval; |
04a8ee2f TG |
5743 | |
5744 | /* First see if emit_store_flag can do the job. */ | |
5745 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5746 | if (tem != 0) | |
5747 | return tem; | |
5748 | ||
495499da PB |
5749 | if (!target) |
5750 | target = gen_reg_rtx (word_mode); | |
04a8ee2f | 5751 | |
495499da PB |
5752 | /* If this failed, we have to do this with set/compare/jump/set code. |
5753 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
5754 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
b8698a0f | 5755 | if (code == NE |
495499da PB |
5756 | && GET_MODE_CLASS (mode) == MODE_INT |
5757 | && REG_P (target) | |
5758 | && op0 == target | |
5759 | && op1 == const0_rtx) | |
5760 | { | |
5761 | label = gen_label_rtx (); | |
5762 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, | |
40e90eac | 5763 | mode, NULL_RTX, NULL_RTX, label, -1); |
495499da PB |
5764 | emit_move_insn (target, trueval); |
5765 | emit_label (label); | |
5766 | return target; | |
5767 | } | |
04a8ee2f | 5768 | |
f8cfc6aa | 5769 | if (!REG_P (target) |
04a8ee2f TG |
5770 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
5771 | target = gen_reg_rtx (GET_MODE (target)); | |
5772 | ||
495499da PB |
5773 | /* Jump in the right direction if the target cannot implement CODE |
5774 | but can jump on its reverse condition. */ | |
5775 | falseval = const0_rtx; | |
5776 | if (! can_compare_p (code, mode, ccp_jump) | |
5777 | && (! FLOAT_MODE_P (mode) | |
5778 | || code == ORDERED || code == UNORDERED | |
5779 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5780 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5781 | { | |
5782 | enum rtx_code rcode; | |
5783 | if (FLOAT_MODE_P (mode)) | |
5784 | rcode = reverse_condition_maybe_unordered (code); | |
5785 | else | |
5786 | rcode = reverse_condition (code); | |
5787 | ||
5788 | /* Canonicalize to UNORDERED for the libcall. */ | |
5789 | if (can_compare_p (rcode, mode, ccp_jump) | |
5790 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
5791 | { | |
5792 | falseval = trueval; | |
5793 | trueval = const0_rtx; | |
5794 | code = rcode; | |
5795 | } | |
5796 | } | |
5797 | ||
5798 | emit_move_insn (target, trueval); | |
04a8ee2f | 5799 | label = gen_label_rtx (); |
d43e0b7d | 5800 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, |
40e90eac | 5801 | NULL_RTX, label, -1); |
04a8ee2f | 5802 | |
495499da | 5803 | emit_move_insn (target, falseval); |
44037a66 TG |
5804 | emit_label (label); |
5805 | ||
5806 | return target; | |
5807 | } | |
f5963e61 JL |
5808 | \f |
5809 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
feb04780 RS |
5810 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
5811 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
f5963e61 JL |
5812 | |
5813 | static void | |
ef4bddc2 | 5814 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
f3f6fb16 | 5815 | rtx_code_label *label) |
f5963e61 | 5816 | { |
feb04780 RS |
5817 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
5818 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, | |
40e90eac | 5819 | NULL_RTX, NULL_RTX, label, -1); |
f5963e61 | 5820 | } |