]>
Commit | Line | Data |
---|---|---|
44037a66 TG |
1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
5624e564 | 3 | Copyright (C) 1987-2015 Free Software Foundation, Inc. |
44037a66 | 4 | |
1322177d | 5 | This file is part of GCC. |
44037a66 | 6 | |
1322177d LB |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
1322177d | 10 | version. |
44037a66 | 11 | |
1322177d LB |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
44037a66 TG |
16 | |
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
44037a66 TG |
20 | |
21 | ||
22 | #include "config.h" | |
670ee920 | 23 | #include "system.h" |
4977bab6 | 24 | #include "coretypes.h" |
c7131fb2 AM |
25 | #include "backend.h" |
26 | #include "tree.h" | |
44037a66 | 27 | #include "rtl.h" |
c7131fb2 AM |
28 | #include "df.h" |
29 | #include "diagnostic-core.h" | |
40e23961 | 30 | #include "alias.h" |
40e23961 | 31 | #include "fold-const.h" |
d8a2d370 | 32 | #include "stor-layout.h" |
6baf1cc8 | 33 | #include "tm_p.h" |
44037a66 | 34 | #include "flags.h" |
44037a66 | 35 | #include "insn-config.h" |
36566b39 PK |
36 | #include "expmed.h" |
37 | #include "dojump.h" | |
38 | #include "explow.h" | |
39 | #include "calls.h" | |
40 | #include "emit-rtl.h" | |
41 | #include "varasm.h" | |
42 | #include "stmt.h" | |
44037a66 | 43 | #include "expr.h" |
b0710fe1 | 44 | #include "insn-codes.h" |
e78d8e51 | 45 | #include "optabs.h" |
44037a66 | 46 | #include "recog.h" |
b0c48229 | 47 | #include "langhooks.h" |
0890b981 | 48 | #include "target.h" |
462f85ce RS |
49 | |
50 | struct target_expmed default_target_expmed; | |
51 | #if SWITCHABLE_TARGET | |
52 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
53 | #endif | |
44037a66 | 54 | |
502b8322 AJ |
55 | static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
56 | unsigned HOST_WIDE_INT, | |
1169e45d AH |
57 | unsigned HOST_WIDE_INT, |
58 | unsigned HOST_WIDE_INT, | |
1169e45d | 59 | rtx); |
ebb99f96 BE |
60 | static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT, |
61 | unsigned HOST_WIDE_INT, | |
62 | rtx); | |
502b8322 | 63 | static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
1169e45d AH |
64 | unsigned HOST_WIDE_INT, |
65 | unsigned HOST_WIDE_INT, | |
66 | unsigned HOST_WIDE_INT, | |
67 | rtx); | |
ef4bddc2 | 68 | static rtx extract_fixed_bit_field (machine_mode, rtx, |
502b8322 | 69 | unsigned HOST_WIDE_INT, |
c6285bd7 | 70 | unsigned HOST_WIDE_INT, rtx, int); |
ef4bddc2 | 71 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, |
6f4e9cf8 BE |
72 | unsigned HOST_WIDE_INT, |
73 | unsigned HOST_WIDE_INT, rtx, int); | |
ef4bddc2 | 74 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
502b8322 AJ |
75 | static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
76 | unsigned HOST_WIDE_INT, int); | |
ef4bddc2 RS |
77 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
78 | static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT); | |
79 | static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT); | |
44037a66 | 80 | |
807e902e KZ |
81 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
82 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. | |
83 | The mask is truncated if necessary to the width of mode MODE. The | |
84 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ | |
85 | ||
86 | static inline rtx | |
ef4bddc2 | 87 | mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement) |
807e902e KZ |
88 | { |
89 | return immed_wide_int_const | |
90 | (wi::shifted_mask (bitpos, bitsize, complement, | |
91 | GET_MODE_PRECISION (mode)), mode); | |
92 | } | |
93 | ||
58b42e19 | 94 | /* Test whether a value is zero of a power of two. */ |
be63b77d JJ |
95 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
96 | (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0) | |
58b42e19 | 97 | |
84ddb681 | 98 | struct init_expmed_rtl |
44037a66 | 99 | { |
c83cf304 JJ |
100 | rtx reg; |
101 | rtx plus; | |
102 | rtx neg; | |
103 | rtx mult; | |
104 | rtx sdiv; | |
105 | rtx udiv; | |
106 | rtx sdiv_32; | |
107 | rtx smod_32; | |
108 | rtx wide_mult; | |
109 | rtx wide_lshr; | |
110 | rtx wide_trunc; | |
111 | rtx shift; | |
112 | rtx shift_mult; | |
113 | rtx shift_add; | |
114 | rtx shift_sub0; | |
115 | rtx shift_sub1; | |
116 | rtx zext; | |
117 | rtx trunc; | |
79b4a8dc | 118 | |
965703ed RS |
119 | rtx pow2[MAX_BITS_PER_WORD]; |
120 | rtx cint[MAX_BITS_PER_WORD]; | |
84ddb681 RH |
121 | }; |
122 | ||
91f8035e | 123 | static void |
ef4bddc2 RS |
124 | init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode, |
125 | machine_mode from_mode, bool speed) | |
91f8035e RH |
126 | { |
127 | int to_size, from_size; | |
128 | rtx which; | |
129 | ||
50b6ee8b DD |
130 | to_size = GET_MODE_PRECISION (to_mode); |
131 | from_size = GET_MODE_PRECISION (from_mode); | |
132 | ||
133 | /* Most partial integers have a precision less than the "full" | |
134 | integer it requires for storage. In case one doesn't, for | |
135 | comparison purposes here, reduce the bit size by one in that | |
136 | case. */ | |
137 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT | |
138 | && exact_log2 (to_size) != -1) | |
139 | to_size --; | |
140 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT | |
141 | && exact_log2 (from_size) != -1) | |
142 | from_size --; | |
91f8035e RH |
143 | |
144 | /* Assume cost of zero-extend and sign-extend is the same. */ | |
c83cf304 | 145 | which = (to_size < from_size ? all->trunc : all->zext); |
91f8035e | 146 | |
c83cf304 | 147 | PUT_MODE (all->reg, from_mode); |
91f8035e RH |
148 | set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed)); |
149 | } | |
150 | ||
84ddb681 RH |
151 | static void |
152 | init_expmed_one_mode (struct init_expmed_rtl *all, | |
ef4bddc2 | 153 | machine_mode mode, int speed) |
84ddb681 RH |
154 | { |
155 | int m, n, mode_bitsize; | |
ef4bddc2 | 156 | machine_mode mode_from; |
44037a66 | 157 | |
84ddb681 | 158 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
38a448ca | 159 | |
c83cf304 JJ |
160 | PUT_MODE (all->reg, mode); |
161 | PUT_MODE (all->plus, mode); | |
162 | PUT_MODE (all->neg, mode); | |
163 | PUT_MODE (all->mult, mode); | |
164 | PUT_MODE (all->sdiv, mode); | |
165 | PUT_MODE (all->udiv, mode); | |
166 | PUT_MODE (all->sdiv_32, mode); | |
167 | PUT_MODE (all->smod_32, mode); | |
168 | PUT_MODE (all->wide_trunc, mode); | |
169 | PUT_MODE (all->shift, mode); | |
170 | PUT_MODE (all->shift_mult, mode); | |
171 | PUT_MODE (all->shift_add, mode); | |
172 | PUT_MODE (all->shift_sub0, mode); | |
173 | PUT_MODE (all->shift_sub1, mode); | |
174 | PUT_MODE (all->zext, mode); | |
175 | PUT_MODE (all->trunc, mode); | |
176 | ||
177 | set_add_cost (speed, mode, set_src_cost (all->plus, speed)); | |
178 | set_neg_cost (speed, mode, set_src_cost (all->neg, speed)); | |
179 | set_mul_cost (speed, mode, set_src_cost (all->mult, speed)); | |
180 | set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed)); | |
181 | set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed)); | |
182 | ||
183 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed) | |
5322d07e | 184 | <= 2 * add_cost (speed, mode))); |
c83cf304 | 185 | set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed) |
5322d07e NF |
186 | <= 4 * add_cost (speed, mode))); |
187 | ||
188 | set_shift_cost (speed, mode, 0, 0); | |
189 | { | |
190 | int cost = add_cost (speed, mode); | |
191 | set_shiftadd_cost (speed, mode, 0, cost); | |
192 | set_shiftsub0_cost (speed, mode, 0, cost); | |
193 | set_shiftsub1_cost (speed, mode, 0, cost); | |
194 | } | |
84ddb681 RH |
195 | |
196 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); | |
197 | for (m = 1; m < n; m++) | |
198 | { | |
c83cf304 JJ |
199 | XEXP (all->shift, 1) = all->cint[m]; |
200 | XEXP (all->shift_mult, 1) = all->pow2[m]; | |
84ddb681 | 201 | |
c83cf304 JJ |
202 | set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed)); |
203 | set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed)); | |
204 | set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed)); | |
205 | set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed)); | |
84ddb681 RH |
206 | } |
207 | ||
208 | if (SCALAR_INT_MODE_P (mode)) | |
965703ed | 209 | { |
91f8035e | 210 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
ef4bddc2 | 211 | mode_from = (machine_mode)(mode_from + 1)) |
91f8035e RH |
212 | init_expmed_one_conv (all, mode, mode_from, speed); |
213 | } | |
214 | if (GET_MODE_CLASS (mode) == MODE_INT) | |
215 | { | |
ef4bddc2 | 216 | machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
84ddb681 RH |
217 | if (wider_mode != VOIDmode) |
218 | { | |
c83cf304 JJ |
219 | PUT_MODE (all->zext, wider_mode); |
220 | PUT_MODE (all->wide_mult, wider_mode); | |
221 | PUT_MODE (all->wide_lshr, wider_mode); | |
222 | XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); | |
84ddb681 | 223 | |
91f8035e | 224 | set_mul_widen_cost (speed, wider_mode, |
c83cf304 | 225 | set_src_cost (all->wide_mult, speed)); |
91f8035e | 226 | set_mul_highpart_cost (speed, mode, |
c83cf304 | 227 | set_src_cost (all->wide_trunc, speed)); |
84ddb681 | 228 | } |
965703ed | 229 | } |
84ddb681 RH |
230 | } |
231 | ||
232 | void | |
233 | init_expmed (void) | |
234 | { | |
235 | struct init_expmed_rtl all; | |
ef4bddc2 | 236 | machine_mode mode = QImode; |
84ddb681 RH |
237 | int m, speed; |
238 | ||
79b4a8dc | 239 | memset (&all, 0, sizeof all); |
84ddb681 RH |
240 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
241 | { | |
242 | all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); | |
243 | all.cint[m] = GEN_INT (m); | |
244 | } | |
79b4a8dc | 245 | |
1d27fed4 | 246 | /* Avoid using hard regs in ways which may be unsupported. */ |
9fccb335 | 247 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
c83cf304 JJ |
248 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |
249 | all.neg = gen_rtx_NEG (mode, all.reg); | |
250 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
251 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); | |
252 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); | |
253 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); | |
254 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); | |
255 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); | |
256 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); | |
257 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); | |
258 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); | |
259 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); | |
260 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
261 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); | |
262 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); | |
263 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); | |
264 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); | |
6dd8f4bb | 265 | |
f40751dd | 266 | for (speed = 0; speed < 2; speed++) |
71af73bb | 267 | { |
f40751dd | 268 | crtl->maybe_hot_insn_p = speed; |
5322d07e | 269 | set_zero_cost (speed, set_src_cost (const0_rtx, speed)); |
79b4a8dc | 270 | |
91f8035e | 271 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
ef4bddc2 | 272 | mode = (machine_mode)(mode + 1)) |
84ddb681 | 273 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 274 | |
91f8035e RH |
275 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
276 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; | |
ef4bddc2 | 277 | mode = (machine_mode)(mode + 1)) |
91f8035e RH |
278 | init_expmed_one_mode (&all, mode, speed); |
279 | ||
280 | if (MIN_MODE_VECTOR_INT != VOIDmode) | |
281 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; | |
ef4bddc2 | 282 | mode = (machine_mode)(mode + 1)) |
91f8035e | 283 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 284 | } |
84ddb681 | 285 | |
5322d07e NF |
286 | if (alg_hash_used_p ()) |
287 | { | |
288 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); | |
289 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); | |
290 | } | |
c371bb73 | 291 | else |
5322d07e | 292 | set_alg_hash_used_p (true); |
f40751dd | 293 | default_rtl_profile (); |
c83cf304 JJ |
294 | |
295 | ggc_free (all.trunc); | |
296 | ggc_free (all.shift_sub1); | |
297 | ggc_free (all.shift_sub0); | |
298 | ggc_free (all.shift_add); | |
299 | ggc_free (all.shift_mult); | |
300 | ggc_free (all.shift); | |
301 | ggc_free (all.wide_trunc); | |
302 | ggc_free (all.wide_lshr); | |
303 | ggc_free (all.wide_mult); | |
304 | ggc_free (all.zext); | |
305 | ggc_free (all.smod_32); | |
306 | ggc_free (all.sdiv_32); | |
307 | ggc_free (all.udiv); | |
308 | ggc_free (all.sdiv); | |
309 | ggc_free (all.mult); | |
310 | ggc_free (all.neg); | |
311 | ggc_free (all.plus); | |
312 | ggc_free (all.reg); | |
44037a66 TG |
313 | } |
314 | ||
315 | /* Return an rtx representing minus the value of X. | |
316 | MODE is the intended mode of the result, | |
317 | useful if X is a CONST_INT. */ | |
318 | ||
319 | rtx | |
ef4bddc2 | 320 | negate_rtx (machine_mode mode, rtx x) |
44037a66 | 321 | { |
a39a7484 RK |
322 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
323 | ||
fdb5537f | 324 | if (result == 0) |
a39a7484 RK |
325 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
326 | ||
327 | return result; | |
44037a66 | 328 | } |
da920570 | 329 | |
26f8b976 RS |
330 | /* Adjust bitfield memory MEM so that it points to the first unit of mode |
331 | MODE that contains a bitfield of size BITSIZE at bit position BITNUM. | |
332 | If MODE is BLKmode, return a reference to every byte in the bitfield. | |
333 | Set *NEW_BITNUM to the bit position of the field within the new memory. */ | |
334 | ||
335 | static rtx | |
ef4bddc2 | 336 | narrow_bit_field_mem (rtx mem, machine_mode mode, |
26f8b976 RS |
337 | unsigned HOST_WIDE_INT bitsize, |
338 | unsigned HOST_WIDE_INT bitnum, | |
339 | unsigned HOST_WIDE_INT *new_bitnum) | |
340 | { | |
341 | if (mode == BLKmode) | |
342 | { | |
343 | *new_bitnum = bitnum % BITS_PER_UNIT; | |
344 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; | |
345 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) | |
346 | / BITS_PER_UNIT); | |
347 | return adjust_bitfield_address_size (mem, mode, offset, size); | |
348 | } | |
349 | else | |
350 | { | |
351 | unsigned int unit = GET_MODE_BITSIZE (mode); | |
352 | *new_bitnum = bitnum % unit; | |
353 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; | |
354 | return adjust_bitfield_address (mem, mode, offset); | |
355 | } | |
356 | } | |
357 | ||
fcdd52b7 RS |
358 | /* The caller wants to perform insertion or extraction PATTERN on a |
359 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. | |
360 | BITREGION_START and BITREGION_END are as for store_bit_field | |
361 | and FIELDMODE is the natural mode of the field. | |
362 | ||
363 | Search for a mode that is compatible with the memory access | |
364 | restrictions and (where applicable) with a register insertion or | |
365 | extraction. Return the new memory on success, storing the adjusted | |
366 | bit position in *NEW_BITNUM. Return null otherwise. */ | |
367 | ||
368 | static rtx | |
369 | adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, | |
370 | rtx op0, HOST_WIDE_INT bitsize, | |
371 | HOST_WIDE_INT bitnum, | |
372 | unsigned HOST_WIDE_INT bitregion_start, | |
373 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 374 | machine_mode fieldmode, |
fcdd52b7 RS |
375 | unsigned HOST_WIDE_INT *new_bitnum) |
376 | { | |
377 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, | |
378 | bitregion_end, MEM_ALIGN (op0), | |
379 | MEM_VOLATILE_P (op0)); | |
ef4bddc2 | 380 | machine_mode best_mode; |
fcdd52b7 RS |
381 | if (iter.next_mode (&best_mode)) |
382 | { | |
383 | /* We can use a memory in BEST_MODE. See whether this is true for | |
384 | any wider modes. All other things being equal, we prefer to | |
385 | use the widest mode possible because it tends to expose more | |
386 | CSE opportunities. */ | |
387 | if (!iter.prefer_smaller_modes ()) | |
388 | { | |
389 | /* Limit the search to the mode required by the corresponding | |
390 | register insertion or extraction instruction, if any. */ | |
ef4bddc2 | 391 | machine_mode limit_mode = word_mode; |
fcdd52b7 RS |
392 | extraction_insn insn; |
393 | if (get_best_reg_extraction_insn (&insn, pattern, | |
394 | GET_MODE_BITSIZE (best_mode), | |
395 | fieldmode)) | |
396 | limit_mode = insn.field_mode; | |
397 | ||
ef4bddc2 | 398 | machine_mode wider_mode; |
fcdd52b7 RS |
399 | while (iter.next_mode (&wider_mode) |
400 | && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) | |
401 | best_mode = wider_mode; | |
402 | } | |
403 | return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, | |
404 | new_bitnum); | |
405 | } | |
406 | return NULL_RTX; | |
407 | } | |
408 | ||
bebf0797 RS |
409 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
410 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg | |
411 | offset is then BITNUM / BITS_PER_UNIT. */ | |
412 | ||
413 | static bool | |
414 | lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, | |
415 | unsigned HOST_WIDE_INT bitsize, | |
ef4bddc2 | 416 | machine_mode struct_mode) |
bebf0797 RS |
417 | { |
418 | if (BYTES_BIG_ENDIAN) | |
c1a4d0b5 | 419 | return (bitnum % BITS_PER_UNIT == 0 |
bebf0797 RS |
420 | && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) |
421 | || (bitnum + bitsize) % BITS_PER_WORD == 0)); | |
422 | else | |
423 | return bitnum % BITS_PER_WORD == 0; | |
424 | } | |
00efe3ea | 425 | |
548cfdc2 | 426 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
6f4e9cf8 BE |
427 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
428 | Return false if the access would touch memory outside the range | |
429 | BITREGION_START to BITREGION_END for conformance to the C++ memory | |
430 | model. */ | |
f5d4f18c SL |
431 | |
432 | static bool | |
433 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
434 | unsigned HOST_WIDE_INT bitnum, | |
ef4bddc2 | 435 | machine_mode fieldmode, |
6f4e9cf8 BE |
436 | unsigned HOST_WIDE_INT bitregion_start, |
437 | unsigned HOST_WIDE_INT bitregion_end) | |
f5d4f18c SL |
438 | { |
439 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); | |
440 | ||
441 | /* -fstrict-volatile-bitfields must be enabled and we must have a | |
442 | volatile MEM. */ | |
443 | if (!MEM_P (op0) | |
444 | || !MEM_VOLATILE_P (op0) | |
445 | || flag_strict_volatile_bitfields <= 0) | |
446 | return false; | |
447 | ||
448 | /* Non-integral modes likely only happen with packed structures. | |
449 | Punt. */ | |
450 | if (!SCALAR_INT_MODE_P (fieldmode)) | |
451 | return false; | |
452 | ||
453 | /* The bit size must not be larger than the field mode, and | |
454 | the field mode must not be larger than a word. */ | |
455 | if (bitsize > modesize || modesize > BITS_PER_WORD) | |
456 | return false; | |
457 | ||
458 | /* Check for cases of unaligned fields that must be split. */ | |
b6dd42a9 BE |
459 | if (bitnum % modesize + bitsize > modesize) |
460 | return false; | |
461 | ||
462 | /* The memory must be sufficiently aligned for a MODESIZE access. | |
463 | This condition guarantees, that the memory access will not | |
464 | touch anything after the end of the structure. */ | |
465 | if (MEM_ALIGN (op0) < modesize) | |
f5d4f18c SL |
466 | return false; |
467 | ||
6f4e9cf8 BE |
468 | /* Check for cases where the C++ memory model applies. */ |
469 | if (bitregion_end != 0 | |
470 | && (bitnum - bitnum % modesize < bitregion_start | |
40f94f7d | 471 | || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) |
6f4e9cf8 BE |
472 | return false; |
473 | ||
f5d4f18c SL |
474 | return true; |
475 | } | |
476 | ||
00efe3ea RS |
477 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
478 | bit number BITNUM can be treated as a simple value of mode MODE. */ | |
479 | ||
480 | static bool | |
481 | simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
ef4bddc2 | 482 | unsigned HOST_WIDE_INT bitnum, machine_mode mode) |
00efe3ea RS |
483 | { |
484 | return (MEM_P (op0) | |
485 | && bitnum % BITS_PER_UNIT == 0 | |
486 | && bitsize == GET_MODE_BITSIZE (mode) | |
487 | && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) | |
488 | || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 | |
489 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); | |
490 | } | |
6d7db3c5 | 491 | \f |
fcdd52b7 RS |
492 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
493 | BITSIZE and BITNUM are as for store_bit_field. */ | |
a20556e4 RS |
494 | |
495 | static bool | |
fcdd52b7 RS |
496 | store_bit_field_using_insv (const extraction_insn *insv, rtx op0, |
497 | unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 EB |
498 | unsigned HOST_WIDE_INT bitnum, |
499 | rtx value) | |
a20556e4 RS |
500 | { |
501 | struct expand_operand ops[4]; | |
502 | rtx value1; | |
503 | rtx xop0 = op0; | |
f3f6fb16 | 504 | rtx_insn *last = get_last_insn (); |
a20556e4 RS |
505 | bool copy_back = false; |
506 | ||
ef4bddc2 | 507 | machine_mode op_mode = insv->field_mode; |
a20556e4 RS |
508 | unsigned int unit = GET_MODE_BITSIZE (op_mode); |
509 | if (bitsize == 0 || bitsize > unit) | |
510 | return false; | |
511 | ||
512 | if (MEM_P (xop0)) | |
26f8b976 | 513 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
514 | xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, |
515 | &bitnum); | |
a20556e4 RS |
516 | else |
517 | { | |
518 | /* Convert from counting within OP0 to counting in OP_MODE. */ | |
519 | if (BYTES_BIG_ENDIAN) | |
520 | bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
521 | ||
522 | /* If xop0 is a register, we need it in OP_MODE | |
523 | to make it acceptable to the format of insv. */ | |
524 | if (GET_CODE (xop0) == SUBREG) | |
525 | /* We can't just change the mode, because this might clobber op0, | |
526 | and we will need the original value of op0 if insv fails. */ | |
527 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); | |
528 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
529 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); | |
530 | } | |
531 | ||
532 | /* If the destination is a paradoxical subreg such that we need a | |
533 | truncate to the inner mode, perform the insertion on a temporary and | |
534 | truncate the result to the original destination. Note that we can't | |
535 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
536 | X) 0)) is (reg:N X). */ | |
537 | if (GET_CODE (xop0) == SUBREG | |
538 | && REG_P (SUBREG_REG (xop0)) | |
539 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), | |
540 | op_mode)) | |
541 | { | |
542 | rtx tem = gen_reg_rtx (op_mode); | |
543 | emit_move_insn (tem, xop0); | |
544 | xop0 = tem; | |
545 | copy_back = true; | |
546 | } | |
547 | ||
4ae9783e JW |
548 | /* There are similar overflow check at the start of store_bit_field_1, |
549 | but that only check the situation where the field lies completely | |
550 | outside the register, while there do have situation where the field | |
551 | lies partialy in the register, we need to adjust bitsize for this | |
552 | partial overflow situation. Without this fix, pr48335-2.c on big-endian | |
553 | will broken on those arch support bit insert instruction, like arm, aarch64 | |
554 | etc. */ | |
555 | if (bitsize + bitnum > unit && bitnum < unit) | |
556 | { | |
e623cedf JW |
557 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |
558 | "destination object, data truncated into %wu-bit", | |
559 | bitsize, unit - bitnum); | |
4ae9783e JW |
560 | bitsize = unit - bitnum; |
561 | } | |
562 | ||
a20556e4 RS |
563 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
564 | "backwards" from the size of the unit we are inserting into. | |
565 | Otherwise, we count bits from the most significant on a | |
566 | BYTES/BITS_BIG_ENDIAN machine. */ | |
567 | ||
568 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
569 | bitnum = unit - bitsize - bitnum; | |
570 | ||
571 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ | |
572 | value1 = value; | |
573 | if (GET_MODE (value) != op_mode) | |
574 | { | |
575 | if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) | |
576 | { | |
577 | /* Optimization: Don't bother really extending VALUE | |
578 | if it has all the bits we will actually use. However, | |
579 | if we must narrow it, be sure we do it correctly. */ | |
580 | ||
581 | if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) | |
582 | { | |
583 | rtx tmp; | |
584 | ||
585 | tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); | |
586 | if (! tmp) | |
587 | tmp = simplify_gen_subreg (op_mode, | |
588 | force_reg (GET_MODE (value), | |
589 | value1), | |
590 | GET_MODE (value), 0); | |
591 | value1 = tmp; | |
592 | } | |
593 | else | |
594 | value1 = gen_lowpart (op_mode, value1); | |
595 | } | |
596 | else if (CONST_INT_P (value)) | |
597 | value1 = gen_int_mode (INTVAL (value), op_mode); | |
598 | else | |
599 | /* Parse phase is supposed to make VALUE's data type | |
600 | match that of the component reference, which is a type | |
601 | at least as wide as the field; so VALUE should have | |
602 | a mode that corresponds to that type. */ | |
603 | gcc_assert (CONSTANT_P (value)); | |
604 | } | |
605 | ||
606 | create_fixed_operand (&ops[0], xop0); | |
607 | create_integer_operand (&ops[1], bitsize); | |
608 | create_integer_operand (&ops[2], bitnum); | |
609 | create_input_operand (&ops[3], value1, op_mode); | |
fcdd52b7 | 610 | if (maybe_expand_insn (insv->icode, 4, ops)) |
a20556e4 RS |
611 | { |
612 | if (copy_back) | |
613 | convert_move (op0, xop0, true); | |
614 | return true; | |
615 | } | |
616 | delete_insns_since (last); | |
617 | return false; | |
618 | } | |
619 | ||
6d7db3c5 RS |
620 | /* A subroutine of store_bit_field, with the same arguments. Return true |
621 | if the operation could be implemented. | |
44037a66 | 622 | |
6d7db3c5 RS |
623 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
624 | no other way of implementing the operation. If FALLBACK_P is false, | |
625 | return false instead. */ | |
626 | ||
627 | static bool | |
628 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1169e45d AH |
629 | unsigned HOST_WIDE_INT bitnum, |
630 | unsigned HOST_WIDE_INT bitregion_start, | |
631 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 632 | machine_mode fieldmode, |
6d7db3c5 | 633 | rtx value, bool fallback_p) |
44037a66 | 634 | { |
b3694847 | 635 | rtx op0 = str_rtx; |
28526e20 | 636 | rtx orig_value; |
da920570 | 637 | |
44037a66 TG |
638 | while (GET_CODE (op0) == SUBREG) |
639 | { | |
640 | /* The following line once was done only if WORDS_BIG_ENDIAN, | |
641 | but I think that is a mistake. WORDS_BIG_ENDIAN is | |
642 | meaningful at a much higher level; when structures are copied | |
643 | between memory and regs, the higher-numbered regs | |
644 | always get higher addresses. */ | |
495db1a1 AK |
645 | int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
646 | int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); | |
bebf0797 | 647 | int byte_offset = 0; |
495db1a1 AK |
648 | |
649 | /* Paradoxical subregs need special handling on big endian machines. */ | |
650 | if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) | |
651 | { | |
652 | int difference = inner_mode_size - outer_mode_size; | |
653 | ||
654 | if (WORDS_BIG_ENDIAN) | |
655 | byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; | |
656 | if (BYTES_BIG_ENDIAN) | |
657 | byte_offset += difference % UNITS_PER_WORD; | |
658 | } | |
659 | else | |
660 | byte_offset = SUBREG_BYTE (op0); | |
661 | ||
662 | bitnum += byte_offset * BITS_PER_UNIT; | |
44037a66 TG |
663 | op0 = SUBREG_REG (op0); |
664 | } | |
665 | ||
2c58f7dd RS |
666 | /* No action is needed if the target is a register and if the field |
667 | lies completely outside that register. This can occur if the source | |
668 | code contains an out-of-bounds access to a small array. */ | |
669 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
6d7db3c5 | 670 | return true; |
2c58f7dd | 671 | |
b42271d6 | 672 | /* Use vec_set patterns for inserting parts of vectors whenever |
997404de JH |
673 | available. */ |
674 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 675 | && !MEM_P (op0) |
947131ba | 676 | && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing |
997404de JH |
677 | && fieldmode == GET_MODE_INNER (GET_MODE (op0)) |
678 | && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) | |
679 | && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
680 | { | |
a5c7d693 | 681 | struct expand_operand ops[3]; |
ef4bddc2 RS |
682 | machine_mode outermode = GET_MODE (op0); |
683 | machine_mode innermode = GET_MODE_INNER (outermode); | |
a5c7d693 | 684 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |
997404de | 685 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de | 686 | |
a5c7d693 RS |
687 | create_fixed_operand (&ops[0], op0); |
688 | create_input_operand (&ops[1], value, innermode); | |
689 | create_integer_operand (&ops[2], pos); | |
690 | if (maybe_expand_insn (icode, 3, ops)) | |
691 | return true; | |
997404de JH |
692 | } |
693 | ||
308ecea0 | 694 | /* If the target is a register, overwriting the entire object, or storing |
bebf0797 RS |
695 | a full-word or multi-word field can be done with just a SUBREG. */ |
696 | if (!MEM_P (op0) | |
697 | && bitsize == GET_MODE_BITSIZE (fieldmode) | |
698 | && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) | |
699 | || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) | |
700 | { | |
701 | /* Use the subreg machinery either to narrow OP0 to the required | |
d8c84975 JJ |
702 | words or to cope with mode punning between equal-sized modes. |
703 | In the latter case, use subreg on the rhs side, not lhs. */ | |
704 | rtx sub; | |
705 | ||
706 | if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) | |
707 | { | |
708 | sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); | |
709 | if (sub) | |
710 | { | |
711 | emit_move_insn (op0, sub); | |
712 | return true; | |
713 | } | |
714 | } | |
715 | else | |
bebf0797 | 716 | { |
d8c84975 JJ |
717 | sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |
718 | bitnum / BITS_PER_UNIT); | |
719 | if (sub) | |
720 | { | |
721 | emit_move_insn (sub, value); | |
722 | return true; | |
723 | } | |
bebf0797 RS |
724 | } |
725 | } | |
308ecea0 | 726 | |
bebf0797 | 727 | /* If the target is memory, storing any naturally aligned field can be |
308ecea0 | 728 | done with a simple store. For targets that support fast unaligned |
0b69c29f | 729 | memory, any naturally sized, unit aligned field can be done directly. */ |
00efe3ea | 730 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) |
44037a66 | 731 | { |
bebf0797 | 732 | op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); |
44037a66 | 733 | emit_move_insn (op0, value); |
6d7db3c5 | 734 | return true; |
44037a66 TG |
735 | } |
736 | ||
a8ca7756 JW |
737 | /* Make sure we are playing with integral modes. Pun with subregs |
738 | if we aren't. This must come after the entire register case above, | |
739 | since that case is valid for any mode. The following cases are only | |
740 | valid for integral modes. */ | |
741 | { | |
ef4bddc2 | 742 | machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
a8ca7756 JW |
743 | if (imode != GET_MODE (op0)) |
744 | { | |
3c0cb5de | 745 | if (MEM_P (op0)) |
e98fc6de | 746 | op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); |
a8ca7756 | 747 | else |
5b0264cb NS |
748 | { |
749 | gcc_assert (imode != BLKmode); | |
750 | op0 = gen_lowpart (imode, op0); | |
751 | } | |
a8ca7756 JW |
752 | } |
753 | } | |
754 | ||
44037a66 | 755 | /* Storing an lsb-aligned field in a register |
bebf0797 | 756 | can be done with a movstrict instruction. */ |
44037a66 | 757 | |
3c0cb5de | 758 | if (!MEM_P (op0) |
bebf0797 | 759 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) |
44037a66 | 760 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
947131ba | 761 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
44037a66 | 762 | { |
a5c7d693 RS |
763 | struct expand_operand ops[2]; |
764 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); | |
5d560619 | 765 | rtx arg0 = op0; |
19228b93 | 766 | unsigned HOST_WIDE_INT subreg_off; |
5e4900c7 | 767 | |
a5c7d693 | 768 | if (GET_CODE (arg0) == SUBREG) |
44037a66 | 769 | { |
5b0264cb NS |
770 | /* Else we've got some float mode source being extracted into |
771 | a different float mode destination -- this combination of | |
772 | subregs results in Severe Tire Damage. */ | |
a5c7d693 | 773 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
5b0264cb NS |
774 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
775 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
a5c7d693 | 776 | arg0 = SUBREG_REG (arg0); |
5e4900c7 | 777 | } |
470032d7 | 778 | |
bebf0797 | 779 | subreg_off = bitnum / BITS_PER_UNIT; |
19228b93 JJ |
780 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) |
781 | { | |
782 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); | |
a5c7d693 | 783 | |
19228b93 JJ |
784 | create_fixed_operand (&ops[0], arg0); |
785 | /* Shrink the source operand to FIELDMODE. */ | |
786 | create_convert_operand_to (&ops[1], value, fieldmode, false); | |
787 | if (maybe_expand_insn (icode, 2, ops)) | |
788 | return true; | |
789 | } | |
44037a66 TG |
790 | } |
791 | ||
792 | /* Handle fields bigger than a word. */ | |
793 | ||
794 | if (bitsize > BITS_PER_WORD) | |
795 | { | |
796 | /* Here we transfer the words of the field | |
797 | in the order least significant first. | |
798 | This is because the most significant word is the one which may | |
ad83e87b PB |
799 | be less than full. |
800 | However, only do that if the value is not BLKmode. */ | |
801 | ||
770ae6cc RK |
802 | unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
803 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; | |
804 | unsigned int i; | |
f3f6fb16 | 805 | rtx_insn *last; |
44037a66 TG |
806 | |
807 | /* This is the mode we must force value to, so that there will be enough | |
808 | subwords to extract. Note that fieldmode will often (always?) be | |
809 | VOIDmode, because that is what store_field uses to indicate that this | |
535a42b1 NS |
810 | is a bit field, but passing VOIDmode to operand_subword_force |
811 | is not allowed. */ | |
9f5e2e11 RS |
812 | fieldmode = GET_MODE (value); |
813 | if (fieldmode == VOIDmode) | |
6f83092f | 814 | fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); |
44037a66 | 815 | |
6d7db3c5 | 816 | last = get_last_insn (); |
44037a66 TG |
817 | for (i = 0; i < nwords; i++) |
818 | { | |
ad83e87b PB |
819 | /* If I is 0, use the low-order word in both field and target; |
820 | if I is 1, use the next to lowest word; and so on. */ | |
00d6b19a AB |
821 | unsigned int wordnum = (backwards |
822 | ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD | |
823 | - i - 1 | |
824 | : i); | |
770ae6cc | 825 | unsigned int bit_offset = (backwards |
04050c69 RK |
826 | ? MAX ((int) bitsize - ((int) i + 1) |
827 | * BITS_PER_WORD, | |
828 | 0) | |
829 | : (int) i * BITS_PER_WORD); | |
6d7db3c5 | 830 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
3bdb97b8 AK |
831 | unsigned HOST_WIDE_INT new_bitsize = |
832 | MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); | |
833 | ||
834 | /* If the remaining chunk doesn't have full wordsize we have | |
835 | to make sure that for big endian machines the higher order | |
836 | bits are used. */ | |
837 | if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) | |
838 | value_word = simplify_expand_binop (word_mode, lshr_optab, | |
839 | value_word, | |
840 | GEN_INT (BITS_PER_WORD | |
841 | - new_bitsize), | |
842 | NULL_RTX, true, | |
843 | OPTAB_LIB_WIDEN); | |
844 | ||
845 | if (!store_bit_field_1 (op0, new_bitsize, | |
1169e45d AH |
846 | bitnum + bit_offset, |
847 | bitregion_start, bitregion_end, | |
848 | word_mode, | |
6d7db3c5 RS |
849 | value_word, fallback_p)) |
850 | { | |
851 | delete_insns_since (last); | |
852 | return false; | |
853 | } | |
44037a66 | 854 | } |
6d7db3c5 | 855 | return true; |
44037a66 TG |
856 | } |
857 | ||
4f1da2e9 RS |
858 | /* If VALUE has a floating-point or complex mode, access it as an |
859 | integer of the corresponding size. This can occur on a machine | |
860 | with 64 bit registers that uses SFmode for float. It can also | |
861 | occur for unaligned float or complex fields. */ | |
28526e20 | 862 | orig_value = value; |
4f1da2e9 RS |
863 | if (GET_MODE (value) != VOIDmode |
864 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT | |
32b069d3 | 865 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
4f1da2e9 RS |
866 | { |
867 | value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); | |
868 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); | |
869 | } | |
2305bcad | 870 | |
bebf0797 RS |
871 | /* If OP0 is a multi-word register, narrow it to the affected word. |
872 | If the region spans two words, defer to store_split_bit_field. */ | |
873 | if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
874 | { | |
875 | op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), | |
876 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); | |
877 | gcc_assert (op0); | |
878 | bitnum %= BITS_PER_WORD; | |
879 | if (bitnum + bitsize > BITS_PER_WORD) | |
880 | { | |
881 | if (!fallback_p) | |
882 | return false; | |
883 | ||
884 | store_split_bit_field (op0, bitsize, bitnum, bitregion_start, | |
885 | bitregion_end, value); | |
886 | return true; | |
887 | } | |
888 | } | |
889 | ||
890 | /* From here on we can assume that the field to be stored in fits | |
891 | within a word. If the destination is a register, it too fits | |
892 | in a word. */ | |
44037a66 | 893 | |
fcdd52b7 RS |
894 | extraction_insn insv; |
895 | if (!MEM_P (op0) | |
896 | && get_best_reg_extraction_insn (&insv, EP_insv, | |
897 | GET_MODE_BITSIZE (GET_MODE (op0)), | |
898 | fieldmode) | |
899 | && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) | |
a20556e4 | 900 | return true; |
6d7db3c5 RS |
901 | |
902 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
903 | cheap register alternative is available. */ | |
fcdd52b7 | 904 | if (MEM_P (op0)) |
6d7db3c5 | 905 | { |
f5d4f18c SL |
906 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
907 | fieldmode) | |
fcdd52b7 | 908 | && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) |
17a73ba0 RS |
909 | return true; |
910 | ||
f3f6fb16 | 911 | rtx_insn *last = get_last_insn (); |
6d7db3c5 | 912 | |
fcdd52b7 RS |
913 | /* Try loading part of OP0 into a register, inserting the bitfield |
914 | into that, and then copying the result back to OP0. */ | |
915 | unsigned HOST_WIDE_INT bitpos; | |
916 | rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, | |
917 | bitregion_start, bitregion_end, | |
918 | fieldmode, &bitpos); | |
919 | if (xop0) | |
0fb7aeda | 920 | { |
fcdd52b7 | 921 | rtx tempreg = copy_to_reg (xop0); |
bebf0797 | 922 | if (store_bit_field_1 (tempreg, bitsize, bitpos, |
1169e45d | 923 | bitregion_start, bitregion_end, |
6d7db3c5 RS |
924 | fieldmode, orig_value, false)) |
925 | { | |
926 | emit_move_insn (xop0, tempreg); | |
927 | return true; | |
928 | } | |
44037a66 | 929 | delete_insns_since (last); |
44037a66 TG |
930 | } |
931 | } | |
6d7db3c5 RS |
932 | |
933 | if (!fallback_p) | |
934 | return false; | |
935 | ||
bebf0797 RS |
936 | store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start, |
937 | bitregion_end, value); | |
6d7db3c5 RS |
938 | return true; |
939 | } | |
940 | ||
941 | /* Generate code to store value from rtx VALUE | |
942 | into a bit-field within structure STR_RTX | |
943 | containing BITSIZE bits starting at bit BITNUM. | |
1169e45d AH |
944 | |
945 | BITREGION_START is bitpos of the first bitfield in this region. | |
946 | BITREGION_END is the bitpos of the ending bitfield in this region. | |
947 | These two fields are 0, if the C++ memory model does not apply, | |
948 | or we are not interested in keeping track of bitfield regions. | |
949 | ||
6d7db3c5 RS |
950 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ |
951 | ||
952 | void | |
953 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1169e45d AH |
954 | unsigned HOST_WIDE_INT bitnum, |
955 | unsigned HOST_WIDE_INT bitregion_start, | |
956 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 957 | machine_mode fieldmode, |
6d7db3c5 RS |
958 | rtx value) |
959 | { | |
f5d4f18c | 960 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
6f4e9cf8 BE |
961 | if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode, |
962 | bitregion_start, bitregion_end)) | |
f5d4f18c | 963 | { |
b6dd42a9 BE |
964 | /* Storing of a full word can be done with a simple store. |
965 | We know here that the field can be accessed with one single | |
966 | instruction. For targets that support unaligned memory, | |
967 | an unaligned access may be necessary. */ | |
53c615a2 | 968 | if (bitsize == GET_MODE_BITSIZE (fieldmode)) |
f5d4f18c SL |
969 | { |
970 | str_rtx = adjust_bitfield_address (str_rtx, fieldmode, | |
971 | bitnum / BITS_PER_UNIT); | |
b6dd42a9 | 972 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |
f5d4f18c SL |
973 | emit_move_insn (str_rtx, value); |
974 | } | |
975 | else | |
ebb99f96 | 976 | { |
53c615a2 BE |
977 | rtx temp; |
978 | ||
ebb99f96 BE |
979 | str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum, |
980 | &bitnum); | |
b6dd42a9 | 981 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode)); |
53c615a2 BE |
982 | temp = copy_to_reg (str_rtx); |
983 | if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, | |
984 | fieldmode, value, true)) | |
985 | gcc_unreachable (); | |
986 | ||
987 | emit_move_insn (str_rtx, temp); | |
ebb99f96 BE |
988 | } |
989 | ||
f5d4f18c SL |
990 | return; |
991 | } | |
992 | ||
1169e45d AH |
993 | /* Under the C++0x memory model, we must not touch bits outside the |
994 | bit region. Adjust the address to start at the beginning of the | |
995 | bit region. */ | |
a59b038c | 996 | if (MEM_P (str_rtx) && bitregion_start > 0) |
1169e45d | 997 | { |
ef4bddc2 | 998 | machine_mode bestmode; |
ee88e690 | 999 | HOST_WIDE_INT offset, size; |
1169e45d | 1000 | |
a59b038c EB |
1001 | gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); |
1002 | ||
1169e45d AH |
1003 | offset = bitregion_start / BITS_PER_UNIT; |
1004 | bitnum -= bitregion_start; | |
ee88e690 | 1005 | size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; |
1169e45d AH |
1006 | bitregion_end -= bitregion_start; |
1007 | bitregion_start = 0; | |
1008 | bestmode = get_best_mode (bitsize, bitnum, | |
1009 | bitregion_start, bitregion_end, | |
fcdd52b7 | 1010 | MEM_ALIGN (str_rtx), VOIDmode, |
1169e45d | 1011 | MEM_VOLATILE_P (str_rtx)); |
ee88e690 | 1012 | str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size); |
1169e45d AH |
1013 | } |
1014 | ||
1015 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, | |
1016 | bitregion_start, bitregion_end, | |
1017 | fieldmode, value, true)) | |
6d7db3c5 | 1018 | gcc_unreachable (); |
44037a66 TG |
1019 | } |
1020 | \f | |
bebf0797 RS |
1021 | /* Use shifts and boolean operations to store VALUE into a bit field of |
1022 | width BITSIZE in OP0, starting at bit BITNUM. */ | |
44037a66 TG |
1023 | |
1024 | static void | |
bebf0797 RS |
1025 | store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1026 | unsigned HOST_WIDE_INT bitnum, | |
1169e45d AH |
1027 | unsigned HOST_WIDE_INT bitregion_start, |
1028 | unsigned HOST_WIDE_INT bitregion_end, | |
1029 | rtx value) | |
44037a66 | 1030 | { |
44037a66 TG |
1031 | /* There is a case not handled here: |
1032 | a structure with a known alignment of just a halfword | |
1033 | and a field split across two aligned halfwords within the structure. | |
1034 | Or likewise a structure with a known alignment of just a byte | |
1035 | and a field split across two bytes. | |
1036 | Such cases are not supposed to be able to occur. */ | |
1037 | ||
bebf0797 | 1038 | if (MEM_P (op0)) |
44037a66 | 1039 | { |
ef4bddc2 | 1040 | machine_mode mode = GET_MODE (op0); |
053a35af | 1041 | if (GET_MODE_BITSIZE (mode) == 0 |
0fb7aeda KH |
1042 | || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
1043 | mode = word_mode; | |
f5d4f18c SL |
1044 | mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
1045 | MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1046 | |
1047 | if (mode == VOIDmode) | |
1048 | { | |
1049 | /* The only way this should occur is if the field spans word | |
1050 | boundaries. */ | |
bebf0797 RS |
1051 | store_split_bit_field (op0, bitsize, bitnum, bitregion_start, |
1052 | bitregion_end, value); | |
44037a66 TG |
1053 | return; |
1054 | } | |
1055 | ||
26f8b976 | 1056 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 TG |
1057 | } |
1058 | ||
ebb99f96 | 1059 | store_fixed_bit_field_1 (op0, bitsize, bitnum, value); |
ebb99f96 BE |
1060 | } |
1061 | ||
1062 | /* Helper function for store_fixed_bit_field, stores | |
1063 | the bit field always using the MODE of OP0. */ | |
1064 | ||
1065 | static void | |
1066 | store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 EB |
1067 | unsigned HOST_WIDE_INT bitnum, |
1068 | rtx value) | |
ebb99f96 | 1069 | { |
ef4bddc2 | 1070 | machine_mode mode; |
ebb99f96 BE |
1071 | rtx temp; |
1072 | int all_zero = 0; | |
1073 | int all_one = 0; | |
1074 | ||
44037a66 | 1075 | mode = GET_MODE (op0); |
bebf0797 | 1076 | gcc_assert (SCALAR_INT_MODE_P (mode)); |
44037a66 | 1077 | |
bebf0797 RS |
1078 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1079 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ | |
44037a66 | 1080 | |
f76b9db2 | 1081 | if (BYTES_BIG_ENDIAN) |
bebf0797 RS |
1082 | /* BITNUM is the distance between our msb |
1083 | and that of the containing datum. | |
1084 | Convert it to the distance from the lsb. */ | |
1085 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; | |
44037a66 | 1086 | |
bebf0797 | 1087 | /* Now BITNUM is always the distance between our lsb |
44037a66 TG |
1088 | and that of OP0. */ |
1089 | ||
bebf0797 | 1090 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
44037a66 TG |
1091 | we must first convert its mode to MODE. */ |
1092 | ||
481683e1 | 1093 | if (CONST_INT_P (value)) |
44037a66 | 1094 | { |
e507a433 | 1095 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
44037a66 | 1096 | |
b1ec3c92 | 1097 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
e507a433 | 1098 | v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1; |
44037a66 TG |
1099 | |
1100 | if (v == 0) | |
1101 | all_zero = 1; | |
b1ec3c92 | 1102 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
e507a433 MP |
1103 | && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1) |
1104 | || (bitsize == HOST_BITS_PER_WIDE_INT | |
1105 | && v == (unsigned HOST_WIDE_INT) -1)) | |
44037a66 TG |
1106 | all_one = 1; |
1107 | ||
088c5368 | 1108 | value = lshift_value (mode, v, bitnum); |
44037a66 TG |
1109 | } |
1110 | else | |
1111 | { | |
1112 | int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize | |
bebf0797 | 1113 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
44037a66 TG |
1114 | |
1115 | if (GET_MODE (value) != mode) | |
86cfb27a | 1116 | value = convert_to_mode (mode, value, 1); |
44037a66 TG |
1117 | |
1118 | if (must_and) | |
1119 | value = expand_binop (mode, and_optab, value, | |
1120 | mask_rtx (mode, 0, bitsize, 0), | |
b1ec3c92 | 1121 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
bebf0797 | 1122 | if (bitnum > 0) |
44037a66 | 1123 | value = expand_shift (LSHIFT_EXPR, mode, value, |
bebf0797 | 1124 | bitnum, NULL_RTX, 1); |
44037a66 TG |
1125 | } |
1126 | ||
1127 | /* Now clear the chosen bits in OP0, | |
1128 | except that if VALUE is -1 we need not bother. */ | |
c505fc06 RS |
1129 | /* We keep the intermediates in registers to allow CSE to combine |
1130 | consecutive bitfield assignments. */ | |
44037a66 | 1131 | |
c505fc06 | 1132 | temp = force_reg (mode, op0); |
44037a66 TG |
1133 | |
1134 | if (! all_one) | |
1135 | { | |
c505fc06 | 1136 | temp = expand_binop (mode, and_optab, temp, |
bebf0797 | 1137 | mask_rtx (mode, bitnum, bitsize, 1), |
c505fc06 RS |
1138 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1139 | temp = force_reg (mode, temp); | |
44037a66 | 1140 | } |
44037a66 TG |
1141 | |
1142 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
1143 | ||
1144 | if (! all_zero) | |
c505fc06 RS |
1145 | { |
1146 | temp = expand_binop (mode, ior_optab, temp, value, | |
1147 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
1148 | temp = force_reg (mode, temp); | |
1149 | } | |
1150 | ||
44037a66 | 1151 | if (op0 != temp) |
4679504c UB |
1152 | { |
1153 | op0 = copy_rtx (op0); | |
1154 | emit_move_insn (op0, temp); | |
1155 | } | |
44037a66 TG |
1156 | } |
1157 | \f | |
06c94bce | 1158 | /* Store a bit field that is split across multiple accessible memory objects. |
44037a66 | 1159 | |
06c94bce | 1160 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
44037a66 TG |
1161 | BITSIZE is the field width; BITPOS the position of its first bit |
1162 | (within the word). | |
06c94bce | 1163 | VALUE is the value to store. |
06c94bce RS |
1164 | |
1165 | This does not yet handle fields wider than BITS_PER_WORD. */ | |
44037a66 TG |
1166 | |
1167 | static void | |
502b8322 | 1168 | store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1169e45d AH |
1169 | unsigned HOST_WIDE_INT bitpos, |
1170 | unsigned HOST_WIDE_INT bitregion_start, | |
1171 | unsigned HOST_WIDE_INT bitregion_end, | |
1172 | rtx value) | |
44037a66 | 1173 | { |
770ae6cc RK |
1174 | unsigned int unit; |
1175 | unsigned int bitsdone = 0; | |
4ee16841 | 1176 | |
0eb61c19 DE |
1177 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1178 | much at a time. */ | |
f8cfc6aa | 1179 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1180 | unit = BITS_PER_WORD; |
1181 | else | |
04050c69 | 1182 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e54d80d0 | 1183 | |
ebb99f96 BE |
1184 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1185 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us | |
1186 | again, and we will mutually recurse forever. */ | |
1187 | if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0) | |
1188 | unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0))); | |
1189 | ||
3d709ff0 RS |
1190 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1191 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1192 | that VALUE might be a floating-point constant. */ | |
481683e1 | 1193 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
3d709ff0 RS |
1194 | { |
1195 | rtx word = gen_lowpart_common (word_mode, value); | |
1196 | ||
bc8a0e39 | 1197 | if (word && (value != word)) |
3d709ff0 RS |
1198 | value = word; |
1199 | else | |
1200 | value = gen_lowpart_common (word_mode, | |
d01bc862 DE |
1201 | force_reg (GET_MODE (value) != VOIDmode |
1202 | ? GET_MODE (value) | |
1203 | : word_mode, value)); | |
3d709ff0 | 1204 | } |
44037a66 | 1205 | |
06c94bce | 1206 | while (bitsdone < bitsize) |
44037a66 | 1207 | { |
770ae6cc | 1208 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1209 | rtx part, word; |
770ae6cc RK |
1210 | unsigned HOST_WIDE_INT thispos; |
1211 | unsigned HOST_WIDE_INT offset; | |
44037a66 | 1212 | |
06c94bce RS |
1213 | offset = (bitpos + bitsdone) / unit; |
1214 | thispos = (bitpos + bitsdone) % unit; | |
44037a66 | 1215 | |
f1cc9589 | 1216 | /* When region of bytes we can touch is restricted, decrease |
bd3647bf JJ |
1217 | UNIT close to the end of the region as needed. If op0 is a REG |
1218 | or SUBREG of REG, don't do this, as there can't be data races | |
1219 | on a register and we can expand shorter code in some cases. */ | |
f1cc9589 JJ |
1220 | if (bitregion_end |
1221 | && unit > BITS_PER_UNIT | |
bd3647bf JJ |
1222 | && bitpos + bitsdone - thispos + unit > bitregion_end + 1 |
1223 | && !REG_P (op0) | |
1224 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) | |
f1cc9589 JJ |
1225 | { |
1226 | unit = unit / 2; | |
1227 | continue; | |
1228 | } | |
1229 | ||
0eb61c19 DE |
1230 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1231 | store_fixed_bit_field will call us again, and we will mutually | |
1232 | recurse forever. */ | |
1233 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1234 | thissize = MIN (thissize, unit - thispos); | |
44037a66 | 1235 | |
f76b9db2 ILT |
1236 | if (BYTES_BIG_ENDIAN) |
1237 | { | |
1238 | /* Fetch successively less significant portions. */ | |
481683e1 | 1239 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1240 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1241 | >> (bitsize - bitsdone - thissize)) | |
1242 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1243 | else | |
b8ab7fc8 RS |
1244 | { |
1245 | int total_bits = GET_MODE_BITSIZE (GET_MODE (value)); | |
1246 | /* The args are chosen so that the last part includes the | |
1247 | lsb. Give extract_bit_field the value it needs (with | |
1248 | endianness compensation) to fetch the piece we want. */ | |
1249 | part = extract_fixed_bit_field (word_mode, value, thissize, | |
1250 | total_bits - bitsize + bitsdone, | |
c6285bd7 | 1251 | NULL_RTX, 1); |
b8ab7fc8 | 1252 | } |
f76b9db2 | 1253 | } |
06c94bce | 1254 | else |
f76b9db2 ILT |
1255 | { |
1256 | /* Fetch successively more significant portions. */ | |
481683e1 | 1257 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1258 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1259 | >> bitsdone) | |
1260 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1261 | else | |
b8ab7fc8 | 1262 | part = extract_fixed_bit_field (word_mode, value, thissize, |
c6285bd7 | 1263 | bitsdone, NULL_RTX, 1); |
f76b9db2 | 1264 | } |
44037a66 | 1265 | |
06c94bce | 1266 | /* If OP0 is a register, then handle OFFSET here. |
5f57dff0 JW |
1267 | |
1268 | When handling multiword bitfields, extract_bit_field may pass | |
1269 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1270 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1271 | the current word starting from the base register. */ | |
1272 | if (GET_CODE (op0) == SUBREG) | |
1273 | { | |
bd3647bf JJ |
1274 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) |
1275 | + (offset * unit / BITS_PER_WORD); | |
ef4bddc2 | 1276 | machine_mode sub_mode = GET_MODE (SUBREG_REG (op0)); |
19228b93 JJ |
1277 | if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD) |
1278 | word = word_offset ? const0_rtx : op0; | |
1279 | else | |
1280 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
1281 | GET_MODE (SUBREG_REG (op0))); | |
bd3647bf | 1282 | offset &= BITS_PER_WORD / unit - 1; |
5f57dff0 | 1283 | } |
f8cfc6aa | 1284 | else if (REG_P (op0)) |
06c94bce | 1285 | { |
ef4bddc2 | 1286 | machine_mode op0_mode = GET_MODE (op0); |
19228b93 JJ |
1287 | if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) |
1288 | word = offset ? const0_rtx : op0; | |
1289 | else | |
bd3647bf JJ |
1290 | word = operand_subword_force (op0, offset * unit / BITS_PER_WORD, |
1291 | GET_MODE (op0)); | |
1292 | offset &= BITS_PER_WORD / unit - 1; | |
06c94bce RS |
1293 | } |
1294 | else | |
1295 | word = op0; | |
44037a66 | 1296 | |
bebf0797 | 1297 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
19228b93 JJ |
1298 | it is just an out-of-bounds access. Ignore it. */ |
1299 | if (word != const0_rtx) | |
bebf0797 RS |
1300 | store_fixed_bit_field (word, thissize, offset * unit + thispos, |
1301 | bitregion_start, bitregion_end, part); | |
06c94bce RS |
1302 | bitsdone += thissize; |
1303 | } | |
44037a66 TG |
1304 | } |
1305 | \f | |
6d7db3c5 RS |
1306 | /* A subroutine of extract_bit_field_1 that converts return value X |
1307 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1308 | to extract_bit_field. */ | |
44037a66 | 1309 | |
6d7db3c5 | 1310 | static rtx |
ef4bddc2 RS |
1311 | convert_extracted_bit_field (rtx x, machine_mode mode, |
1312 | machine_mode tmode, bool unsignedp) | |
6d7db3c5 RS |
1313 | { |
1314 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1315 | return x; | |
44037a66 | 1316 | |
6d7db3c5 RS |
1317 | /* If the x mode is not a scalar integral, first convert to the |
1318 | integer mode of that size and then access it as a floating-point | |
1319 | value via a SUBREG. */ | |
1320 | if (!SCALAR_INT_MODE_P (tmode)) | |
1321 | { | |
ef4bddc2 | 1322 | machine_mode smode; |
44037a66 | 1323 | |
6d7db3c5 RS |
1324 | smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
1325 | x = convert_to_mode (smode, x, unsignedp); | |
1326 | x = force_reg (smode, x); | |
1327 | return gen_lowpart (tmode, x); | |
1328 | } | |
44037a66 | 1329 | |
6d7db3c5 RS |
1330 | return convert_to_mode (tmode, x, unsignedp); |
1331 | } | |
1332 | ||
a20556e4 RS |
1333 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1334 | Return the extracted value on success, otherwise return null. | |
1335 | EXT_MODE is the mode of the extraction and the other arguments | |
1336 | are as for extract_bit_field. */ | |
1337 | ||
1338 | static rtx | |
fcdd52b7 RS |
1339 | extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, |
1340 | unsigned HOST_WIDE_INT bitsize, | |
a20556e4 RS |
1341 | unsigned HOST_WIDE_INT bitnum, |
1342 | int unsignedp, rtx target, | |
ef4bddc2 | 1343 | machine_mode mode, machine_mode tmode) |
a20556e4 RS |
1344 | { |
1345 | struct expand_operand ops[4]; | |
1346 | rtx spec_target = target; | |
1347 | rtx spec_target_subreg = 0; | |
ef4bddc2 | 1348 | machine_mode ext_mode = extv->field_mode; |
a20556e4 RS |
1349 | unsigned unit = GET_MODE_BITSIZE (ext_mode); |
1350 | ||
1351 | if (bitsize == 0 || unit < bitsize) | |
1352 | return NULL_RTX; | |
1353 | ||
1354 | if (MEM_P (op0)) | |
26f8b976 | 1355 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
1356 | op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, |
1357 | &bitnum); | |
a20556e4 RS |
1358 | else |
1359 | { | |
1360 | /* Convert from counting within OP0 to counting in EXT_MODE. */ | |
1361 | if (BYTES_BIG_ENDIAN) | |
1362 | bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
1363 | ||
1364 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1365 | acceptable to the format of ext(z)v. */ | |
1366 | if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | |
1367 | return NULL_RTX; | |
1368 | if (REG_P (op0) && GET_MODE (op0) != ext_mode) | |
1369 | op0 = gen_lowpart_SUBREG (ext_mode, op0); | |
1370 | } | |
1371 | ||
1372 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | |
1373 | "backwards" from the size of the unit we are extracting from. | |
1374 | Otherwise, we count bits from the most significant on a | |
1375 | BYTES/BITS_BIG_ENDIAN machine. */ | |
1376 | ||
1377 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1378 | bitnum = unit - bitsize - bitnum; | |
1379 | ||
1380 | if (target == 0) | |
1381 | target = spec_target = gen_reg_rtx (tmode); | |
1382 | ||
1383 | if (GET_MODE (target) != ext_mode) | |
1384 | { | |
1385 | /* Don't use LHS paradoxical subreg if explicit truncation is needed | |
1386 | between the mode of the extraction (word_mode) and the target | |
1387 | mode. Instead, create a temporary and use convert_move to set | |
1388 | the target. */ | |
1389 | if (REG_P (target) | |
1390 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) | |
1391 | { | |
1392 | target = gen_lowpart (ext_mode, target); | |
1393 | if (GET_MODE_PRECISION (ext_mode) | |
1394 | > GET_MODE_PRECISION (GET_MODE (spec_target))) | |
1395 | spec_target_subreg = target; | |
1396 | } | |
1397 | else | |
1398 | target = gen_reg_rtx (ext_mode); | |
1399 | } | |
1400 | ||
1401 | create_output_operand (&ops[0], target, ext_mode); | |
1402 | create_fixed_operand (&ops[1], op0); | |
1403 | create_integer_operand (&ops[2], bitsize); | |
1404 | create_integer_operand (&ops[3], bitnum); | |
fcdd52b7 | 1405 | if (maybe_expand_insn (extv->icode, 4, ops)) |
a20556e4 RS |
1406 | { |
1407 | target = ops[0].value; | |
1408 | if (target == spec_target) | |
1409 | return target; | |
1410 | if (target == spec_target_subreg) | |
1411 | return spec_target; | |
1412 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1413 | } | |
1414 | return NULL_RTX; | |
1415 | } | |
1416 | ||
6d7db3c5 RS |
1417 | /* A subroutine of extract_bit_field, with the same arguments. |
1418 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1419 | if we can find no other means of implementing the operation. | |
1420 | if FALLBACK_P is false, return NULL instead. */ | |
1421 | ||
1422 | static rtx | |
1423 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
c6285bd7 | 1424 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
ef4bddc2 | 1425 | machine_mode mode, machine_mode tmode, |
6d7db3c5 | 1426 | bool fallback_p) |
44037a66 | 1427 | { |
b3694847 | 1428 | rtx op0 = str_rtx; |
ef4bddc2 RS |
1429 | machine_mode int_mode; |
1430 | machine_mode mode1; | |
44037a66 | 1431 | |
44037a66 TG |
1432 | if (tmode == VOIDmode) |
1433 | tmode = mode; | |
6ca6193b | 1434 | |
44037a66 TG |
1435 | while (GET_CODE (op0) == SUBREG) |
1436 | { | |
2c58f7dd | 1437 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
44037a66 TG |
1438 | op0 = SUBREG_REG (op0); |
1439 | } | |
77295dec | 1440 | |
2c58f7dd | 1441 | /* If we have an out-of-bounds access to a register, just return an |
647eea9d | 1442 | uninitialized register of the required mode. This can occur if the |
2c58f7dd RS |
1443 | source code contains an out-of-bounds access to a small array. */ |
1444 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1445 | return gen_reg_rtx (tmode); | |
1446 | ||
f8cfc6aa | 1447 | if (REG_P (op0) |
aac280fb DD |
1448 | && mode == GET_MODE (op0) |
1449 | && bitnum == 0 | |
0b69c29f | 1450 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
aac280fb | 1451 | { |
0b69c29f | 1452 | /* We're trying to extract a full register from itself. */ |
aac280fb DD |
1453 | return op0; |
1454 | } | |
1455 | ||
0890b981 AP |
1456 | /* See if we can get a better vector mode before extracting. */ |
1457 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1458 | && !MEM_P (op0) | |
1459 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1460 | { | |
ef4bddc2 | 1461 | machine_mode new_mode; |
0890b981 AP |
1462 | |
1463 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1464 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
325217ed CF |
1465 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1466 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1467 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1468 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1469 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1470 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1471 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1472 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
0890b981 AP |
1473 | else |
1474 | new_mode = MIN_MODE_VECTOR_INT; | |
1475 | ||
1476 | for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) | |
b147c5b9 | 1477 | if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
0890b981 AP |
1478 | && targetm.vector_mode_supported_p (new_mode)) |
1479 | break; | |
1480 | if (new_mode != VOIDmode) | |
1481 | op0 = gen_lowpart (new_mode, op0); | |
1482 | } | |
1483 | ||
997404de JH |
1484 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1485 | available. */ | |
1486 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 1487 | && !MEM_P (op0) |
947131ba | 1488 | && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing |
b42271d6 JB |
1489 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
1490 | == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
997404de | 1491 | { |
a5c7d693 | 1492 | struct expand_operand ops[3]; |
ef4bddc2 RS |
1493 | machine_mode outermode = GET_MODE (op0); |
1494 | machine_mode innermode = GET_MODE_INNER (outermode); | |
a5c7d693 | 1495 | enum insn_code icode = optab_handler (vec_extract_optab, outermode); |
b42271d6 | 1496 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de | 1497 | |
a5c7d693 RS |
1498 | create_output_operand (&ops[0], target, innermode); |
1499 | create_input_operand (&ops[1], op0, outermode); | |
1500 | create_integer_operand (&ops[2], pos); | |
1501 | if (maybe_expand_insn (icode, 3, ops)) | |
997404de | 1502 | { |
a5c7d693 RS |
1503 | target = ops[0].value; |
1504 | if (GET_MODE (target) != mode) | |
1505 | return gen_lowpart (tmode, target); | |
1506 | return target; | |
997404de JH |
1507 | } |
1508 | } | |
1509 | ||
d006aa54 RH |
1510 | /* Make sure we are playing with integral modes. Pun with subregs |
1511 | if we aren't. */ | |
1512 | { | |
ef4bddc2 | 1513 | machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
d006aa54 RH |
1514 | if (imode != GET_MODE (op0)) |
1515 | { | |
a6d2976a | 1516 | if (MEM_P (op0)) |
e98fc6de | 1517 | op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); |
7d293b58 | 1518 | else if (imode != BLKmode) |
a6d2976a | 1519 | { |
a6d2976a | 1520 | op0 = gen_lowpart (imode, op0); |
360e3535 | 1521 | |
a6d2976a JDA |
1522 | /* If we got a SUBREG, force it into a register since we |
1523 | aren't going to be able to do another SUBREG on it. */ | |
1524 | if (GET_CODE (op0) == SUBREG) | |
1525 | op0 = force_reg (imode, op0); | |
1526 | } | |
7d293b58 JJ |
1527 | else if (REG_P (op0)) |
1528 | { | |
1529 | rtx reg, subreg; | |
1530 | imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), | |
1531 | MODE_INT); | |
1532 | reg = gen_reg_rtx (imode); | |
1533 | subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); | |
1534 | emit_move_insn (subreg, op0); | |
1535 | op0 = reg; | |
1536 | bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; | |
1537 | } | |
1538 | else | |
1539 | { | |
e98fc6de RS |
1540 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); |
1541 | rtx mem = assign_stack_temp (GET_MODE (op0), size); | |
7d293b58 | 1542 | emit_move_insn (mem, op0); |
e98fc6de | 1543 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); |
7d293b58 | 1544 | } |
d006aa54 RH |
1545 | } |
1546 | } | |
1547 | ||
6ca6193b JDA |
1548 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1549 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1550 | if needed. */ | |
e98f90d3 | 1551 | |
f5d4f18c SL |
1552 | /* Get the mode of the field to use for atomic access or subreg |
1553 | conversion. */ | |
b8ab7fc8 RS |
1554 | mode1 = mode; |
1555 | if (SCALAR_INT_MODE_P (tmode)) | |
44037a66 | 1556 | { |
ef4bddc2 | 1557 | machine_mode try_mode = mode_for_size (bitsize, |
b8ab7fc8 RS |
1558 | GET_MODE_CLASS (tmode), 0); |
1559 | if (try_mode != BLKmode) | |
1560 | mode1 = try_mode; | |
1561 | } | |
1562 | gcc_assert (mode1 != BLKmode); | |
1563 | ||
1564 | /* Extraction of a full MODE1 value can be done with a subreg as long | |
1565 | as the least significant bit of the value is the least significant | |
1566 | bit of either OP0 or a word of OP0. */ | |
1567 | if (!MEM_P (op0) | |
1568 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) | |
1569 | && bitsize == GET_MODE_BITSIZE (mode1) | |
1570 | && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))) | |
1571 | { | |
1572 | rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), | |
1573 | bitnum / BITS_PER_UNIT); | |
1574 | if (sub) | |
1575 | return convert_extracted_bit_field (sub, mode, tmode, unsignedp); | |
1576 | } | |
1577 | ||
1578 | /* Extraction of a full MODE1 value can be done with a load as long as | |
1579 | the field is on a byte boundary and is sufficiently aligned. */ | |
00efe3ea | 1580 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) |
b8ab7fc8 RS |
1581 | { |
1582 | op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); | |
1583 | return convert_extracted_bit_field (op0, mode, tmode, unsignedp); | |
44037a66 | 1584 | } |
b8ab7fc8 | 1585 | |
44037a66 | 1586 | /* Handle fields bigger than a word. */ |
c410d49e | 1587 | |
44037a66 TG |
1588 | if (bitsize > BITS_PER_WORD) |
1589 | { | |
1590 | /* Here we transfer the words of the field | |
1591 | in the order least significant first. | |
1592 | This is because the most significant word is the one which may | |
1593 | be less than full. */ | |
1594 | ||
0cd9e9ee | 1595 | unsigned int backwards = WORDS_BIG_ENDIAN; |
770ae6cc RK |
1596 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1597 | unsigned int i; | |
f3f6fb16 | 1598 | rtx_insn *last; |
44037a66 | 1599 | |
02972eaf | 1600 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
44037a66 TG |
1601 | target = gen_reg_rtx (mode); |
1602 | ||
7d21a61e DD |
1603 | /* In case we're about to clobber a base register or something |
1604 | (see gcc.c-torture/execute/20040625-1.c). */ | |
1605 | if (reg_mentioned_p (target, str_rtx)) | |
1606 | target = gen_reg_rtx (mode); | |
1607 | ||
34ea783b | 1608 | /* Indicate for flow that the entire target reg is being set. */ |
c41c1387 | 1609 | emit_clobber (target); |
34ea783b | 1610 | |
5ef0b50d | 1611 | last = get_last_insn (); |
44037a66 TG |
1612 | for (i = 0; i < nwords; i++) |
1613 | { | |
1614 | /* If I is 0, use the low-order word in both field and target; | |
1615 | if I is 1, use the next to lowest word; and so on. */ | |
77295dec | 1616 | /* Word number in TARGET to use. */ |
770ae6cc | 1617 | unsigned int wordnum |
0cd9e9ee | 1618 | = (backwards |
770ae6cc RK |
1619 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 |
1620 | : i); | |
77295dec | 1621 | /* Offset from start of field in OP0. */ |
0cd9e9ee EB |
1622 | unsigned int bit_offset = (backwards |
1623 | ? MAX ((int) bitsize - ((int) i + 1) | |
1624 | * BITS_PER_WORD, | |
1625 | 0) | |
770ae6cc | 1626 | : (int) i * BITS_PER_WORD); |
44037a66 TG |
1627 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1628 | rtx result_part | |
5ef0b50d EB |
1629 | = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, |
1630 | bitsize - i * BITS_PER_WORD), | |
c6285bd7 | 1631 | bitnum + bit_offset, 1, target_part, |
5ef0b50d | 1632 | mode, word_mode, fallback_p); |
44037a66 | 1633 | |
5b0264cb | 1634 | gcc_assert (target_part); |
5ef0b50d EB |
1635 | if (!result_part) |
1636 | { | |
1637 | delete_insns_since (last); | |
1638 | return NULL; | |
1639 | } | |
44037a66 TG |
1640 | |
1641 | if (result_part != target_part) | |
1642 | emit_move_insn (target_part, result_part); | |
1643 | } | |
1644 | ||
5f57dff0 | 1645 | if (unsignedp) |
77295dec DE |
1646 | { |
1647 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1648 | need to be zero'd out. */ | |
1649 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1650 | { | |
770ae6cc | 1651 | unsigned int i, total_words; |
77295dec DE |
1652 | |
1653 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1654 | for (i = nwords; i < total_words; i++) | |
04050c69 RK |
1655 | emit_move_insn |
1656 | (operand_subword (target, | |
0cd9e9ee | 1657 | backwards ? total_words - i - 1 : i, |
04050c69 RK |
1658 | 1, VOIDmode), |
1659 | const0_rtx); | |
77295dec DE |
1660 | } |
1661 | return target; | |
1662 | } | |
1663 | ||
5f57dff0 JW |
1664 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1665 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
eb6c3df1 | 1666 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
5f57dff0 | 1667 | return expand_shift (RSHIFT_EXPR, mode, target, |
eb6c3df1 | 1668 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
44037a66 | 1669 | } |
c410d49e | 1670 | |
b8ab7fc8 RS |
1671 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1672 | If the region spans two words, defer to extract_split_bit_field. */ | |
1673 | if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
44037a66 | 1674 | { |
b8ab7fc8 RS |
1675 | op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), |
1676 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); | |
1677 | bitnum %= BITS_PER_WORD; | |
1678 | if (bitnum + bitsize > BITS_PER_WORD) | |
470032d7 | 1679 | { |
b8ab7fc8 RS |
1680 | if (!fallback_p) |
1681 | return NULL_RTX; | |
1682 | target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp); | |
1683 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
470032d7 | 1684 | } |
44037a66 | 1685 | } |
44037a66 | 1686 | |
b8ab7fc8 RS |
1687 | /* From here on we know the desired field is smaller than a word. |
1688 | If OP0 is a register, it too fits within a word. */ | |
fcdd52b7 RS |
1689 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
1690 | extraction_insn extv; | |
1691 | if (!MEM_P (op0) | |
c0a8a3e6 RS |
1692 | /* ??? We could limit the structure size to the part of OP0 that |
1693 | contains the field, with appropriate checks for endianness | |
1694 | and TRULY_NOOP_TRUNCATION. */ | |
1695 | && get_best_reg_extraction_insn (&extv, pattern, | |
1696 | GET_MODE_BITSIZE (GET_MODE (op0)), | |
fcdd52b7 | 1697 | tmode)) |
44037a66 | 1698 | { |
fcdd52b7 | 1699 | rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum, |
a20556e4 | 1700 | unsignedp, target, mode, |
fcdd52b7 | 1701 | tmode); |
a20556e4 RS |
1702 | if (result) |
1703 | return result; | |
44037a66 | 1704 | } |
f76b9db2 | 1705 | |
6d7db3c5 RS |
1706 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1707 | cheap register alternative is available. */ | |
fcdd52b7 | 1708 | if (MEM_P (op0)) |
6d7db3c5 | 1709 | { |
f5d4f18c SL |
1710 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
1711 | tmode)) | |
17a73ba0 | 1712 | { |
fcdd52b7 RS |
1713 | rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, |
1714 | bitnum, unsignedp, | |
1715 | target, mode, | |
1716 | tmode); | |
17a73ba0 RS |
1717 | if (result) |
1718 | return result; | |
1719 | } | |
1720 | ||
f3f6fb16 | 1721 | rtx_insn *last = get_last_insn (); |
f76b9db2 | 1722 | |
fcdd52b7 RS |
1723 | /* Try loading part of OP0 into a register and extracting the |
1724 | bitfield from that. */ | |
1725 | unsigned HOST_WIDE_INT bitpos; | |
1726 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, | |
1727 | 0, 0, tmode, &bitpos); | |
1728 | if (xop0) | |
6d7db3c5 | 1729 | { |
fcdd52b7 RS |
1730 | xop0 = copy_to_reg (xop0); |
1731 | rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, | |
c6285bd7 | 1732 | unsignedp, target, |
6d7db3c5 | 1733 | mode, tmode, false); |
fcdd52b7 RS |
1734 | if (result) |
1735 | return result; | |
1736 | delete_insns_since (last); | |
c410d49e | 1737 | } |
44037a66 | 1738 | } |
562fc702 | 1739 | |
6d7db3c5 RS |
1740 | if (!fallback_p) |
1741 | return NULL; | |
1742 | ||
b8ab7fc8 RS |
1743 | /* Find a correspondingly-sized integer field, so we can apply |
1744 | shifts and masks to it. */ | |
1745 | int_mode = int_mode_for_mode (tmode); | |
1746 | if (int_mode == BLKmode) | |
1747 | int_mode = int_mode_for_mode (mode); | |
1748 | /* Should probably push op0 out to memory and then do a load. */ | |
1749 | gcc_assert (int_mode != BLKmode); | |
1750 | ||
1751 | target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, | |
c6285bd7 | 1752 | target, unsignedp); |
6d7db3c5 RS |
1753 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
1754 | } | |
1755 | ||
1756 | /* Generate code to extract a byte-field from STR_RTX | |
1757 | containing BITSIZE bits, starting at BITNUM, | |
1758 | and put it in TARGET if possible (if TARGET is nonzero). | |
1759 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1760 | ||
1761 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1762 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
1763 | MODE is the natural mode of the field value once extracted. | |
1764 | TMODE is the mode the caller would like the value to have; | |
1765 | but the value may be returned with type MODE instead. | |
1766 | ||
1767 | If a TARGET is specified and we can store in it at no extra cost, | |
1768 | we do so, and return TARGET. | |
1769 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1770 | if they are equally easy. */ | |
1771 | ||
1772 | rtx | |
1773 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
c6285bd7 | 1774 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
ef4bddc2 | 1775 | machine_mode mode, machine_mode tmode) |
6d7db3c5 | 1776 | { |
ef4bddc2 | 1777 | machine_mode mode1; |
f5d4f18c SL |
1778 | |
1779 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ | |
1780 | if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) | |
1781 | mode1 = GET_MODE (str_rtx); | |
1782 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) | |
1783 | mode1 = GET_MODE (target); | |
1784 | else | |
1785 | mode1 = tmode; | |
1786 | ||
6f4e9cf8 | 1787 | if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0)) |
f5d4f18c | 1788 | { |
b6dd42a9 BE |
1789 | /* Extraction of a full MODE1 value can be done with a simple load. |
1790 | We know here that the field can be accessed with one single | |
1791 | instruction. For targets that support unaligned memory, | |
1792 | an unaligned access may be necessary. */ | |
1793 | if (bitsize == GET_MODE_BITSIZE (mode1)) | |
6f4e9cf8 | 1794 | { |
53c615a2 BE |
1795 | rtx result = adjust_bitfield_address (str_rtx, mode1, |
1796 | bitnum / BITS_PER_UNIT); | |
b6dd42a9 | 1797 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |
53c615a2 | 1798 | return convert_extracted_bit_field (result, mode, tmode, unsignedp); |
6f4e9cf8 BE |
1799 | } |
1800 | ||
53c615a2 BE |
1801 | str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum, |
1802 | &bitnum); | |
b6dd42a9 | 1803 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1)); |
53c615a2 | 1804 | str_rtx = copy_to_reg (str_rtx); |
f5d4f18c | 1805 | } |
53c615a2 | 1806 | |
c6285bd7 | 1807 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
6d7db3c5 | 1808 | target, mode, tmode, true); |
44037a66 TG |
1809 | } |
1810 | \f | |
b8ab7fc8 RS |
1811 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
1812 | from bit BITNUM of OP0. | |
44037a66 TG |
1813 | |
1814 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
1815 | If TARGET is nonzero, attempts to store the value there | |
1816 | and return TARGET, but this is not guaranteed. | |
04050c69 | 1817 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
44037a66 TG |
1818 | |
1819 | static rtx | |
ef4bddc2 | 1820 | extract_fixed_bit_field (machine_mode tmode, rtx op0, |
502b8322 | 1821 | unsigned HOST_WIDE_INT bitsize, |
b8ab7fc8 | 1822 | unsigned HOST_WIDE_INT bitnum, rtx target, |
c6285bd7 | 1823 | int unsignedp) |
44037a66 | 1824 | { |
b8ab7fc8 | 1825 | if (MEM_P (op0)) |
44037a66 | 1826 | { |
ef4bddc2 | 1827 | machine_mode mode |
548cfdc2 EB |
1828 | = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode, |
1829 | MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1830 | |
1831 | if (mode == VOIDmode) | |
1832 | /* The only way this should occur is if the field spans word | |
1833 | boundaries. */ | |
b8ab7fc8 | 1834 | return extract_split_bit_field (op0, bitsize, bitnum, unsignedp); |
44037a66 | 1835 | |
f5d4f18c | 1836 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 TG |
1837 | } |
1838 | ||
6f4e9cf8 BE |
1839 | return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum, |
1840 | target, unsignedp); | |
1841 | } | |
1842 | ||
1843 | /* Helper function for extract_fixed_bit_field, extracts | |
1844 | the bit field always using the MODE of OP0. */ | |
1845 | ||
1846 | static rtx | |
ef4bddc2 | 1847 | extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, |
6f4e9cf8 BE |
1848 | unsigned HOST_WIDE_INT bitsize, |
1849 | unsigned HOST_WIDE_INT bitnum, rtx target, | |
1850 | int unsignedp) | |
1851 | { | |
ef4bddc2 | 1852 | machine_mode mode = GET_MODE (op0); |
b8ab7fc8 RS |
1853 | gcc_assert (SCALAR_INT_MODE_P (mode)); |
1854 | ||
1855 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) | |
1856 | for invalid input, such as extract equivalent of f5 from | |
1857 | gcc.dg/pr48335-2.c. */ | |
37811a73 | 1858 | |
f76b9db2 | 1859 | if (BYTES_BIG_ENDIAN) |
b8ab7fc8 | 1860 | /* BITNUM is the distance between our msb and that of OP0. |
04050c69 | 1861 | Convert it to the distance from the lsb. */ |
b8ab7fc8 | 1862 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
44037a66 | 1863 | |
b8ab7fc8 | 1864 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
44037a66 TG |
1865 | We have reduced the big-endian case to the little-endian case. */ |
1866 | ||
1867 | if (unsignedp) | |
1868 | { | |
b8ab7fc8 | 1869 | if (bitnum) |
44037a66 TG |
1870 | { |
1871 | /* If the field does not already start at the lsb, | |
1872 | shift it so it does. */ | |
44037a66 | 1873 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 1874 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
6399c0ab SB |
1875 | if (tmode != mode) |
1876 | subtarget = 0; | |
b8ab7fc8 | 1877 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
44037a66 TG |
1878 | } |
1879 | /* Convert the value to the desired mode. */ | |
1880 | if (mode != tmode) | |
1881 | op0 = convert_to_mode (tmode, op0, 1); | |
1882 | ||
1883 | /* Unless the msb of the field used to be the msb when we shifted, | |
1884 | mask out the upper bits. */ | |
1885 | ||
b8ab7fc8 | 1886 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) |
44037a66 TG |
1887 | return expand_binop (GET_MODE (op0), and_optab, op0, |
1888 | mask_rtx (GET_MODE (op0), 0, bitsize, 0), | |
1889 | target, 1, OPTAB_LIB_WIDEN); | |
1890 | return op0; | |
1891 | } | |
1892 | ||
1893 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
1894 | then arithmetic-shift its lsb to the lsb of the word. */ | |
1895 | op0 = force_reg (mode, op0); | |
44037a66 TG |
1896 | |
1897 | /* Find the narrowest integer mode that contains the field. */ | |
1898 | ||
1899 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; | |
1900 | mode = GET_MODE_WIDER_MODE (mode)) | |
b8ab7fc8 | 1901 | if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum) |
44037a66 TG |
1902 | { |
1903 | op0 = convert_to_mode (mode, op0, 0); | |
1904 | break; | |
1905 | } | |
1906 | ||
ccb1b17b JJ |
1907 | if (mode != tmode) |
1908 | target = 0; | |
1909 | ||
b8ab7fc8 | 1910 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
44037a66 | 1911 | { |
b8ab7fc8 | 1912 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
44037a66 | 1913 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 1914 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
1915 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
1916 | } | |
1917 | ||
1918 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 1919 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
44037a66 | 1920 | } |
44037a66 TG |
1921 | |
1922 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
088c5368 | 1923 | VALUE << BITPOS. */ |
44037a66 TG |
1924 | |
1925 | static rtx | |
ef4bddc2 | 1926 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
088c5368 | 1927 | int bitpos) |
44037a66 | 1928 | { |
807e902e | 1929 | return immed_wide_int_const (wi::lshift (value, bitpos), mode); |
44037a66 TG |
1930 | } |
1931 | \f | |
1932 | /* Extract a bit field that is split across two words | |
1933 | and return an RTX for the result. | |
1934 | ||
1935 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
1936 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
04050c69 | 1937 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ |
44037a66 TG |
1938 | |
1939 | static rtx | |
502b8322 AJ |
1940 | extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1941 | unsigned HOST_WIDE_INT bitpos, int unsignedp) | |
44037a66 | 1942 | { |
770ae6cc RK |
1943 | unsigned int unit; |
1944 | unsigned int bitsdone = 0; | |
c16ddde3 | 1945 | rtx result = NULL_RTX; |
06c94bce | 1946 | int first = 1; |
44037a66 | 1947 | |
4ee16841 DE |
1948 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1949 | much at a time. */ | |
f8cfc6aa | 1950 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1951 | unit = BITS_PER_WORD; |
1952 | else | |
609023ff | 1953 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
4ee16841 | 1954 | |
06c94bce RS |
1955 | while (bitsdone < bitsize) |
1956 | { | |
770ae6cc | 1957 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1958 | rtx part, word; |
770ae6cc RK |
1959 | unsigned HOST_WIDE_INT thispos; |
1960 | unsigned HOST_WIDE_INT offset; | |
06c94bce RS |
1961 | |
1962 | offset = (bitpos + bitsdone) / unit; | |
1963 | thispos = (bitpos + bitsdone) % unit; | |
1964 | ||
0eb61c19 DE |
1965 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1966 | extract_fixed_bit_field will call us again, and we will mutually | |
1967 | recurse forever. */ | |
1968 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1969 | thissize = MIN (thissize, unit - thispos); | |
06c94bce RS |
1970 | |
1971 | /* If OP0 is a register, then handle OFFSET here. | |
5f57dff0 JW |
1972 | |
1973 | When handling multiword bitfields, extract_bit_field may pass | |
1974 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1975 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1976 | the current word starting from the base register. */ | |
1977 | if (GET_CODE (op0) == SUBREG) | |
1978 | { | |
ddef6bc7 JJ |
1979 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
1980 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
5f57dff0 JW |
1981 | GET_MODE (SUBREG_REG (op0))); |
1982 | offset = 0; | |
1983 | } | |
f8cfc6aa | 1984 | else if (REG_P (op0)) |
06c94bce RS |
1985 | { |
1986 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
1987 | offset = 0; | |
1988 | } | |
1989 | else | |
1990 | word = op0; | |
1991 | ||
06c94bce | 1992 | /* Extract the parts in bit-counting order, |
0eb61c19 | 1993 | whose meaning is determined by BYTES_PER_UNIT. |
b8ab7fc8 RS |
1994 | OFFSET is in UNITs, and UNIT is in bits. */ |
1995 | part = extract_fixed_bit_field (word_mode, word, thissize, | |
c6285bd7 | 1996 | offset * unit + thispos, 0, 1); |
06c94bce | 1997 | bitsdone += thissize; |
44037a66 | 1998 | |
06c94bce | 1999 | /* Shift this part into place for the result. */ |
f76b9db2 ILT |
2000 | if (BYTES_BIG_ENDIAN) |
2001 | { | |
2002 | if (bitsize != bitsdone) | |
2003 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 2004 | bitsize - bitsdone, 0, 1); |
f76b9db2 ILT |
2005 | } |
2006 | else | |
2007 | { | |
2008 | if (bitsdone != thissize) | |
2009 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 2010 | bitsdone - thissize, 0, 1); |
f76b9db2 | 2011 | } |
44037a66 | 2012 | |
06c94bce RS |
2013 | if (first) |
2014 | result = part; | |
2015 | else | |
2016 | /* Combine the parts with bitwise or. This works | |
2017 | because we extracted each part as an unsigned bit field. */ | |
2018 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
2019 | OPTAB_LIB_WIDEN); | |
2020 | ||
2021 | first = 0; | |
2022 | } | |
44037a66 TG |
2023 | |
2024 | /* Unsigned bit field: we are done. */ | |
2025 | if (unsignedp) | |
2026 | return result; | |
2027 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
2028 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
eb6c3df1 | 2029 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 | 2030 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
eb6c3df1 | 2031 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 TG |
2032 | } |
2033 | \f | |
18b526e8 RS |
2034 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2035 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
2036 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
2037 | mode is unknown (as for CC modes) or if the extraction would involve | |
2038 | unprofitable mode punning. Return the value on success, otherwise | |
2039 | return null. | |
2040 | ||
2041 | This is different from gen_lowpart* in these respects: | |
2042 | ||
2043 | - the returned value must always be considered an rvalue | |
2044 | ||
2045 | - when MODE is wider than SRC_MODE, the extraction involves | |
2046 | a zero extension | |
2047 | ||
2048 | - when MODE is smaller than SRC_MODE, the extraction involves | |
2049 | a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). | |
2050 | ||
2051 | In other words, this routine performs a computation, whereas the | |
2052 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2053 | operations. */ | |
2054 | ||
2055 | rtx | |
ef4bddc2 | 2056 | extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) |
18b526e8 | 2057 | { |
ef4bddc2 | 2058 | machine_mode int_mode, src_int_mode; |
18b526e8 RS |
2059 | |
2060 | if (mode == src_mode) | |
2061 | return src; | |
2062 | ||
2063 | if (CONSTANT_P (src)) | |
d898d29b JJ |
2064 | { |
2065 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2066 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2067 | invalid SUBREGs. */ | |
2068 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); | |
2069 | rtx ret = simplify_subreg (mode, src, src_mode, byte); | |
2070 | if (ret) | |
2071 | return ret; | |
2072 | ||
2073 | if (GET_MODE (src) == VOIDmode | |
2074 | || !validate_subreg (mode, src_mode, src, byte)) | |
2075 | return NULL_RTX; | |
2076 | ||
2077 | src = force_reg (GET_MODE (src), src); | |
2078 | return gen_rtx_SUBREG (mode, src, byte); | |
2079 | } | |
18b526e8 RS |
2080 | |
2081 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2082 | return NULL_RTX; | |
2083 | ||
2084 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
2085 | && MODES_TIEABLE_P (mode, src_mode)) | |
2086 | { | |
2087 | rtx x = gen_lowpart_common (mode, src); | |
2088 | if (x) | |
2089 | return x; | |
2090 | } | |
2091 | ||
2092 | src_int_mode = int_mode_for_mode (src_mode); | |
2093 | int_mode = int_mode_for_mode (mode); | |
2094 | if (src_int_mode == BLKmode || int_mode == BLKmode) | |
2095 | return NULL_RTX; | |
2096 | ||
2097 | if (!MODES_TIEABLE_P (src_int_mode, src_mode)) | |
2098 | return NULL_RTX; | |
2099 | if (!MODES_TIEABLE_P (int_mode, mode)) | |
2100 | return NULL_RTX; | |
2101 | ||
2102 | src = gen_lowpart (src_int_mode, src); | |
2103 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2104 | src = gen_lowpart (mode, src); | |
2105 | return src; | |
2106 | } | |
2107 | \f | |
44037a66 TG |
2108 | /* Add INC into TARGET. */ |
2109 | ||
2110 | void | |
502b8322 | 2111 | expand_inc (rtx target, rtx inc) |
44037a66 TG |
2112 | { |
2113 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2114 | target, inc, | |
2115 | target, 0, OPTAB_LIB_WIDEN); | |
2116 | if (value != target) | |
2117 | emit_move_insn (target, value); | |
2118 | } | |
2119 | ||
2120 | /* Subtract DEC from TARGET. */ | |
2121 | ||
2122 | void | |
502b8322 | 2123 | expand_dec (rtx target, rtx dec) |
44037a66 TG |
2124 | { |
2125 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2126 | target, dec, | |
2127 | target, 0, OPTAB_LIB_WIDEN); | |
2128 | if (value != target) | |
2129 | emit_move_insn (target, value); | |
2130 | } | |
2131 | \f | |
2132 | /* Output a shift instruction for expression code CODE, | |
2133 | with SHIFTED being the rtx for the value to shift, | |
86529a49 | 2134 | and AMOUNT the rtx for the amount to shift by. |
44037a66 TG |
2135 | Store the result in the rtx TARGET, if that is convenient. |
2136 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2137 | Return the rtx for where the value is. */ | |
2138 | ||
86529a49 | 2139 | static rtx |
ef4bddc2 | 2140 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
86529a49 | 2141 | rtx amount, rtx target, int unsignedp) |
44037a66 | 2142 | { |
b3694847 SS |
2143 | rtx op1, temp = 0; |
2144 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2145 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
71d46ca5 MM |
2146 | optab lshift_optab = ashl_optab; |
2147 | optab rshift_arith_optab = ashr_optab; | |
2148 | optab rshift_uns_optab = lshr_optab; | |
2149 | optab lrotate_optab = rotl_optab; | |
2150 | optab rrotate_optab = rotr_optab; | |
ef4bddc2 RS |
2151 | machine_mode op1_mode; |
2152 | machine_mode scalar_mode = mode; | |
d858f359 | 2153 | int attempt; |
f40751dd | 2154 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 2155 | |
a4ee446d JJ |
2156 | if (VECTOR_MODE_P (mode)) |
2157 | scalar_mode = GET_MODE_INNER (mode); | |
86529a49 | 2158 | op1 = amount; |
71d46ca5 MM |
2159 | op1_mode = GET_MODE (op1); |
2160 | ||
2161 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2162 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2163 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2164 | { | |
2165 | lshift_optab = vashl_optab; | |
2166 | rshift_arith_optab = vashr_optab; | |
2167 | rshift_uns_optab = vlshr_optab; | |
2168 | lrotate_optab = vrotl_optab; | |
2169 | rrotate_optab = vrotr_optab; | |
2170 | } | |
2171 | ||
44037a66 TG |
2172 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2173 | and shifted in the other direction; but that does not work | |
2174 | on all machines. */ | |
2175 | ||
166cdf4a RH |
2176 | if (SHIFT_COUNT_TRUNCATED) |
2177 | { | |
481683e1 | 2178 | if (CONST_INT_P (op1) |
0fb7aeda | 2179 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
a4ee446d | 2180 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) |
0fb7aeda | 2181 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
a4ee446d | 2182 | % GET_MODE_BITSIZE (scalar_mode)); |
166cdf4a | 2183 | else if (GET_CODE (op1) == SUBREG |
c1cb09ad | 2184 | && subreg_lowpart_p (op1) |
7afe2801 DM |
2185 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2186 | && SCALAR_INT_MODE_P (GET_MODE (op1))) | |
166cdf4a RH |
2187 | op1 = SUBREG_REG (op1); |
2188 | } | |
2ab0a5c4 | 2189 | |
75776c6d JJ |
2190 | /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, |
2191 | prefer left rotation, if op1 is from bitsize / 2 + 1 to | |
2192 | bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 | |
2193 | amount instead. */ | |
2194 | if (rotate | |
2195 | && CONST_INT_P (op1) | |
a4ee446d JJ |
2196 | && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, |
2197 | GET_MODE_BITSIZE (scalar_mode) - 1)) | |
75776c6d | 2198 | { |
a4ee446d | 2199 | op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); |
75776c6d JJ |
2200 | left = !left; |
2201 | code = left ? LROTATE_EXPR : RROTATE_EXPR; | |
2202 | } | |
2203 | ||
a25efea0 TP |
2204 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |
2205 | Note that this is not the case for bigger values. For instance a rotation | |
2206 | of 0x01020304 by 16 bits gives 0x03040102 which is different from | |
2207 | 0x04030201 (bswapsi). */ | |
2208 | if (rotate | |
2209 | && CONST_INT_P (op1) | |
2210 | && INTVAL (op1) == BITS_PER_UNIT | |
2211 | && GET_MODE_SIZE (scalar_mode) == 2 | |
2212 | && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing) | |
2213 | return expand_unop (HImode, bswap_optab, shifted, NULL_RTX, | |
2214 | unsignedp); | |
2215 | ||
44037a66 TG |
2216 | if (op1 == const0_rtx) |
2217 | return shifted; | |
2218 | ||
15bad393 RS |
2219 | /* Check whether its cheaper to implement a left shift by a constant |
2220 | bit count by a sequence of additions. */ | |
2221 | if (code == LSHIFT_EXPR | |
481683e1 | 2222 | && CONST_INT_P (op1) |
15bad393 | 2223 | && INTVAL (op1) > 0 |
a4ee446d | 2224 | && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) |
cb2eb96f | 2225 | && INTVAL (op1) < MAX_BITS_PER_WORD |
5322d07e NF |
2226 | && (shift_cost (speed, mode, INTVAL (op1)) |
2227 | > INTVAL (op1) * add_cost (speed, mode)) | |
2228 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) | |
15bad393 RS |
2229 | { |
2230 | int i; | |
2231 | for (i = 0; i < INTVAL (op1); i++) | |
2232 | { | |
2233 | temp = force_reg (mode, shifted); | |
2234 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2235 | unsignedp, OPTAB_LIB_WIDEN); | |
2236 | } | |
2237 | return shifted; | |
2238 | } | |
2239 | ||
d858f359 | 2240 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
44037a66 TG |
2241 | { |
2242 | enum optab_methods methods; | |
2243 | ||
d858f359 | 2244 | if (attempt == 0) |
44037a66 | 2245 | methods = OPTAB_DIRECT; |
d858f359 | 2246 | else if (attempt == 1) |
44037a66 TG |
2247 | methods = OPTAB_WIDEN; |
2248 | else | |
2249 | methods = OPTAB_LIB_WIDEN; | |
2250 | ||
2251 | if (rotate) | |
2252 | { | |
2253 | /* Widening does not work for rotation. */ | |
2254 | if (methods == OPTAB_WIDEN) | |
2255 | continue; | |
2256 | else if (methods == OPTAB_LIB_WIDEN) | |
cbec710e | 2257 | { |
39e71615 | 2258 | /* If we have been unable to open-code this by a rotation, |
cbec710e | 2259 | do it as the IOR of two shifts. I.e., to rotate A |
ae6fa899 JJ |
2260 | by N bits, compute |
2261 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) | |
cbec710e RK |
2262 | where C is the bitsize of A. |
2263 | ||
2264 | It is theoretically possible that the target machine might | |
2265 | not be able to perform either shift and hence we would | |
2266 | be making two libcalls rather than just the one for the | |
2267 | shift (similarly if IOR could not be done). We will allow | |
2268 | this extremely unlikely lossage to avoid complicating the | |
2269 | code below. */ | |
2270 | ||
39e71615 | 2271 | rtx subtarget = target == shifted ? 0 : target; |
86529a49 | 2272 | rtx new_amount, other_amount; |
39e71615 | 2273 | rtx temp1; |
86529a49 RG |
2274 | |
2275 | new_amount = op1; | |
ae6fa899 JJ |
2276 | if (op1 == const0_rtx) |
2277 | return shifted; | |
2278 | else if (CONST_INT_P (op1)) | |
a4ee446d | 2279 | other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) |
5c049507 RG |
2280 | - INTVAL (op1)); |
2281 | else | |
ae6fa899 JJ |
2282 | { |
2283 | other_amount | |
2284 | = simplify_gen_unary (NEG, GET_MODE (op1), | |
2285 | op1, GET_MODE (op1)); | |
a4ee446d | 2286 | HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; |
ae6fa899 | 2287 | other_amount |
69a59f0f RS |
2288 | = simplify_gen_binary (AND, GET_MODE (op1), other_amount, |
2289 | gen_int_mode (mask, GET_MODE (op1))); | |
ae6fa899 | 2290 | } |
39e71615 RK |
2291 | |
2292 | shifted = force_reg (mode, shifted); | |
2293 | ||
86529a49 RG |
2294 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2295 | mode, shifted, new_amount, 0, 1); | |
2296 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, | |
2297 | mode, shifted, other_amount, | |
2298 | subtarget, 1); | |
39e71615 RK |
2299 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2300 | unsignedp, methods); | |
cbec710e | 2301 | } |
44037a66 TG |
2302 | |
2303 | temp = expand_binop (mode, | |
71d46ca5 | 2304 | left ? lrotate_optab : rrotate_optab, |
44037a66 TG |
2305 | shifted, op1, target, unsignedp, methods); |
2306 | } | |
2307 | else if (unsignedp) | |
a34958c9 | 2308 | temp = expand_binop (mode, |
71d46ca5 | 2309 | left ? lshift_optab : rshift_uns_optab, |
a34958c9 | 2310 | shifted, op1, target, unsignedp, methods); |
44037a66 TG |
2311 | |
2312 | /* Do arithmetic shifts. | |
2313 | Also, if we are going to widen the operand, we can just as well | |
2314 | use an arithmetic right-shift instead of a logical one. */ | |
2315 | if (temp == 0 && ! rotate | |
2316 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2317 | { | |
2318 | enum optab_methods methods1 = methods; | |
2319 | ||
2320 | /* If trying to widen a log shift to an arithmetic shift, | |
2321 | don't accept an arithmetic shift of the same size. */ | |
2322 | if (unsignedp) | |
2323 | methods1 = OPTAB_MUST_WIDEN; | |
2324 | ||
2325 | /* Arithmetic shift */ | |
2326 | ||
2327 | temp = expand_binop (mode, | |
71d46ca5 | 2328 | left ? lshift_optab : rshift_arith_optab, |
44037a66 TG |
2329 | shifted, op1, target, unsignedp, methods1); |
2330 | } | |
2331 | ||
711a5e64 | 2332 | /* We used to try extzv here for logical right shifts, but that was |
c410d49e | 2333 | only useful for one machine, the VAX, and caused poor code |
711a5e64 RK |
2334 | generation there for lshrdi3, so the code was deleted and a |
2335 | define_expand for lshrsi3 was added to vax.md. */ | |
44037a66 TG |
2336 | } |
2337 | ||
5b0264cb | 2338 | gcc_assert (temp); |
44037a66 TG |
2339 | return temp; |
2340 | } | |
eb6c3df1 RG |
2341 | |
2342 | /* Output a shift instruction for expression code CODE, | |
2343 | with SHIFTED being the rtx for the value to shift, | |
2344 | and AMOUNT the amount to shift by. | |
2345 | Store the result in the rtx TARGET, if that is convenient. | |
2346 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2347 | Return the rtx for where the value is. */ | |
2348 | ||
2349 | rtx | |
ef4bddc2 | 2350 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
eb6c3df1 RG |
2351 | int amount, rtx target, int unsignedp) |
2352 | { | |
86529a49 RG |
2353 | return expand_shift_1 (code, mode, |
2354 | shifted, GEN_INT (amount), target, unsignedp); | |
2355 | } | |
2356 | ||
2357 | /* Output a shift instruction for expression code CODE, | |
2358 | with SHIFTED being the rtx for the value to shift, | |
2359 | and AMOUNT the tree for the amount to shift by. | |
2360 | Store the result in the rtx TARGET, if that is convenient. | |
2361 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2362 | Return the rtx for where the value is. */ | |
2363 | ||
2364 | rtx | |
ef4bddc2 | 2365 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
86529a49 RG |
2366 | tree amount, rtx target, int unsignedp) |
2367 | { | |
2368 | return expand_shift_1 (code, mode, | |
2369 | shifted, expand_normal (amount), target, unsignedp); | |
eb6c3df1 | 2370 | } |
86529a49 | 2371 | |
44037a66 | 2372 | \f |
8efc8980 RS |
2373 | /* Indicates the type of fixup needed after a constant multiplication. |
2374 | BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that | |
2375 | the result should be negated, and ADD_VARIANT means that the | |
2376 | multiplicand should be added to the result. */ | |
2377 | enum mult_variant {basic_variant, negate_variant, add_variant}; | |
2378 | ||
41c64ac0 | 2379 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
ef4bddc2 RS |
2380 | const struct mult_cost *, machine_mode mode); |
2381 | static bool choose_mult_variant (machine_mode, HOST_WIDE_INT, | |
f258e38b | 2382 | struct algorithm *, enum mult_variant *, int); |
ef4bddc2 | 2383 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
8efc8980 | 2384 | const struct algorithm *, enum mult_variant); |
502b8322 | 2385 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
ef4bddc2 RS |
2386 | static rtx extract_high_half (machine_mode, rtx); |
2387 | static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int); | |
2388 | static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx, | |
8efc8980 | 2389 | int, int); |
44037a66 | 2390 | /* Compute and return the best algorithm for multiplying by T. |
7963ac37 RK |
2391 | The algorithm must cost less than cost_limit |
2392 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
41c64ac0 RS |
2393 | other field of the returned struct are undefined. |
2394 | MODE is the machine mode of the multiplication. */ | |
44037a66 | 2395 | |
819126a6 | 2396 | static void |
502b8322 | 2397 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
ef4bddc2 | 2398 | const struct mult_cost *cost_limit, machine_mode mode) |
44037a66 | 2399 | { |
b2fb324c | 2400 | int m; |
52786026 | 2401 | struct algorithm *alg_in, *best_alg; |
26276705 RS |
2402 | struct mult_cost best_cost; |
2403 | struct mult_cost new_limit; | |
2404 | int op_cost, op_latency; | |
ef268d34 | 2405 | unsigned HOST_WIDE_INT orig_t = t; |
b2fb324c | 2406 | unsigned HOST_WIDE_INT q; |
84ddb681 | 2407 | int maxm, hash_index; |
7b13ee6b KH |
2408 | bool cache_hit = false; |
2409 | enum alg_code cache_alg = alg_zero; | |
f40751dd | 2410 | bool speed = optimize_insn_for_speed_p (); |
ef4bddc2 | 2411 | machine_mode imode; |
5322d07e | 2412 | struct alg_hash_entry *entry_ptr; |
44037a66 | 2413 | |
7963ac37 RK |
2414 | /* Indicate that no algorithm is yet found. If no algorithm |
2415 | is found, this value will be returned and indicate failure. */ | |
26276705 | 2416 | alg_out->cost.cost = cost_limit->cost + 1; |
3ab0f290 | 2417 | alg_out->cost.latency = cost_limit->latency + 1; |
44037a66 | 2418 | |
26276705 RS |
2419 | if (cost_limit->cost < 0 |
2420 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
819126a6 | 2421 | return; |
44037a66 | 2422 | |
84ddb681 RH |
2423 | /* Be prepared for vector modes. */ |
2424 | imode = GET_MODE_INNER (mode); | |
2425 | if (imode == VOIDmode) | |
2426 | imode = mode; | |
2427 | ||
2428 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); | |
2429 | ||
0792ab19 | 2430 | /* Restrict the bits of "t" to the multiplication's mode. */ |
84ddb681 | 2431 | t &= GET_MODE_MASK (imode); |
0792ab19 | 2432 | |
b385aeda RK |
2433 | /* t == 1 can be done in zero cost. */ |
2434 | if (t == 1) | |
b2fb324c | 2435 | { |
819126a6 | 2436 | alg_out->ops = 1; |
26276705 RS |
2437 | alg_out->cost.cost = 0; |
2438 | alg_out->cost.latency = 0; | |
819126a6 RK |
2439 | alg_out->op[0] = alg_m; |
2440 | return; | |
b2fb324c RK |
2441 | } |
2442 | ||
b385aeda RK |
2443 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2444 | fail now. */ | |
819126a6 | 2445 | if (t == 0) |
b385aeda | 2446 | { |
5322d07e | 2447 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
819126a6 | 2448 | return; |
b385aeda RK |
2449 | else |
2450 | { | |
819126a6 | 2451 | alg_out->ops = 1; |
5322d07e NF |
2452 | alg_out->cost.cost = zero_cost (speed); |
2453 | alg_out->cost.latency = zero_cost (speed); | |
819126a6 RK |
2454 | alg_out->op[0] = alg_zero; |
2455 | return; | |
b385aeda RK |
2456 | } |
2457 | } | |
2458 | ||
52786026 RK |
2459 | /* We'll be needing a couple extra algorithm structures now. */ |
2460 | ||
1b4572a8 KG |
2461 | alg_in = XALLOCA (struct algorithm); |
2462 | best_alg = XALLOCA (struct algorithm); | |
26276705 | 2463 | best_cost = *cost_limit; |
52786026 | 2464 | |
7b13ee6b | 2465 | /* Compute the hash index. */ |
f40751dd | 2466 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7b13ee6b KH |
2467 | |
2468 | /* See if we already know what to do for T. */ | |
5322d07e NF |
2469 | entry_ptr = alg_hash_entry_ptr (hash_index); |
2470 | if (entry_ptr->t == t | |
2471 | && entry_ptr->mode == mode | |
2472 | && entry_ptr->mode == mode | |
2473 | && entry_ptr->speed == speed | |
2474 | && entry_ptr->alg != alg_unknown) | |
7b13ee6b | 2475 | { |
5322d07e | 2476 | cache_alg = entry_ptr->alg; |
0178027c KH |
2477 | |
2478 | if (cache_alg == alg_impossible) | |
7b13ee6b | 2479 | { |
0178027c | 2480 | /* The cache tells us that it's impossible to synthesize |
5322d07e NF |
2481 | multiplication by T within entry_ptr->cost. */ |
2482 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) | |
0178027c KH |
2483 | /* COST_LIMIT is at least as restrictive as the one |
2484 | recorded in the hash table, in which case we have no | |
2485 | hope of synthesizing a multiplication. Just | |
2486 | return. */ | |
2487 | return; | |
2488 | ||
2489 | /* If we get here, COST_LIMIT is less restrictive than the | |
2490 | one recorded in the hash table, so we may be able to | |
2491 | synthesize a multiplication. Proceed as if we didn't | |
2492 | have the cache entry. */ | |
2493 | } | |
2494 | else | |
2495 | { | |
5322d07e | 2496 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
0178027c KH |
2497 | /* The cached algorithm shows that this multiplication |
2498 | requires more cost than COST_LIMIT. Just return. This | |
2499 | way, we don't clobber this cache entry with | |
2500 | alg_impossible but retain useful information. */ | |
2501 | return; | |
7b13ee6b | 2502 | |
0178027c KH |
2503 | cache_hit = true; |
2504 | ||
2505 | switch (cache_alg) | |
2506 | { | |
2507 | case alg_shift: | |
2508 | goto do_alg_shift; | |
7b13ee6b | 2509 | |
0178027c KH |
2510 | case alg_add_t_m2: |
2511 | case alg_sub_t_m2: | |
2512 | goto do_alg_addsub_t_m2; | |
7b13ee6b | 2513 | |
0178027c KH |
2514 | case alg_add_factor: |
2515 | case alg_sub_factor: | |
2516 | goto do_alg_addsub_factor; | |
7b13ee6b | 2517 | |
0178027c KH |
2518 | case alg_add_t2_m: |
2519 | goto do_alg_add_t2_m; | |
7b13ee6b | 2520 | |
0178027c KH |
2521 | case alg_sub_t2_m: |
2522 | goto do_alg_sub_t2_m; | |
2523 | ||
2524 | default: | |
2525 | gcc_unreachable (); | |
2526 | } | |
7b13ee6b KH |
2527 | } |
2528 | } | |
2529 | ||
b385aeda RK |
2530 | /* If we have a group of zero bits at the low-order part of T, try |
2531 | multiplying by the remaining bits and then doing a shift. */ | |
2532 | ||
b2fb324c | 2533 | if ((t & 1) == 0) |
44037a66 | 2534 | { |
7b13ee6b | 2535 | do_alg_shift: |
b2fb324c | 2536 | m = floor_log2 (t & -t); /* m = number of low zero bits */ |
0792ab19 | 2537 | if (m < maxm) |
44037a66 | 2538 | { |
02a65aef | 2539 | q = t >> m; |
15bad393 RS |
2540 | /* The function expand_shift will choose between a shift and |
2541 | a sequence of additions, so the observed cost is given as | |
5322d07e NF |
2542 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2543 | op_cost = m * add_cost (speed, mode); | |
2544 | if (shift_cost (speed, mode, m) < op_cost) | |
2545 | op_cost = shift_cost (speed, mode, m); | |
26276705 RS |
2546 | new_limit.cost = best_cost.cost - op_cost; |
2547 | new_limit.latency = best_cost.latency - op_cost; | |
2548 | synth_mult (alg_in, q, &new_limit, mode); | |
2549 | ||
2550 | alg_in->cost.cost += op_cost; | |
2551 | alg_in->cost.latency += op_cost; | |
2552 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
02a65aef | 2553 | { |
26276705 | 2554 | best_cost = alg_in->cost; |
076701b6 | 2555 | std::swap (alg_in, best_alg); |
02a65aef R |
2556 | best_alg->log[best_alg->ops] = m; |
2557 | best_alg->op[best_alg->ops] = alg_shift; | |
02a65aef | 2558 | } |
ddc2690a KH |
2559 | |
2560 | /* See if treating ORIG_T as a signed number yields a better | |
2561 | sequence. Try this sequence only for a negative ORIG_T | |
2562 | as it would be useless for a non-negative ORIG_T. */ | |
2563 | if ((HOST_WIDE_INT) orig_t < 0) | |
2564 | { | |
2565 | /* Shift ORIG_T as follows because a right shift of a | |
2566 | negative-valued signed type is implementation | |
2567 | defined. */ | |
2568 | q = ~(~orig_t >> m); | |
2569 | /* The function expand_shift will choose between a shift | |
2570 | and a sequence of additions, so the observed cost is | |
5322d07e NF |
2571 | given as MIN (m * add_cost(speed, mode), |
2572 | shift_cost(speed, mode, m)). */ | |
2573 | op_cost = m * add_cost (speed, mode); | |
2574 | if (shift_cost (speed, mode, m) < op_cost) | |
2575 | op_cost = shift_cost (speed, mode, m); | |
ddc2690a KH |
2576 | new_limit.cost = best_cost.cost - op_cost; |
2577 | new_limit.latency = best_cost.latency - op_cost; | |
2578 | synth_mult (alg_in, q, &new_limit, mode); | |
2579 | ||
2580 | alg_in->cost.cost += op_cost; | |
2581 | alg_in->cost.latency += op_cost; | |
2582 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2583 | { | |
ddc2690a | 2584 | best_cost = alg_in->cost; |
076701b6 | 2585 | std::swap (alg_in, best_alg); |
ddc2690a KH |
2586 | best_alg->log[best_alg->ops] = m; |
2587 | best_alg->op[best_alg->ops] = alg_shift; | |
2588 | } | |
2589 | } | |
819126a6 | 2590 | } |
7b13ee6b KH |
2591 | if (cache_hit) |
2592 | goto done; | |
819126a6 RK |
2593 | } |
2594 | ||
2595 | /* If we have an odd number, add or subtract one. */ | |
2596 | if ((t & 1) != 0) | |
2597 | { | |
2598 | unsigned HOST_WIDE_INT w; | |
2599 | ||
7b13ee6b | 2600 | do_alg_addsub_t_m2: |
819126a6 RK |
2601 | for (w = 1; (w & t) != 0; w <<= 1) |
2602 | ; | |
31031edd | 2603 | /* If T was -1, then W will be zero after the loop. This is another |
c410d49e | 2604 | case where T ends with ...111. Handling this with (T + 1) and |
31031edd JL |
2605 | subtract 1 produces slightly better code and results in algorithm |
2606 | selection much faster than treating it like the ...0111 case | |
2607 | below. */ | |
2608 | if (w == 0 | |
2609 | || (w > 2 | |
2610 | /* Reject the case where t is 3. | |
2611 | Thus we prefer addition in that case. */ | |
2612 | && t != 3)) | |
819126a6 | 2613 | { |
c61928d0 | 2614 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |
819126a6 | 2615 | |
5322d07e | 2616 | op_cost = add_cost (speed, mode); |
26276705 RS |
2617 | new_limit.cost = best_cost.cost - op_cost; |
2618 | new_limit.latency = best_cost.latency - op_cost; | |
2619 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
b2fb324c | 2620 | |
26276705 RS |
2621 | alg_in->cost.cost += op_cost; |
2622 | alg_in->cost.latency += op_cost; | |
2623 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2624 | { |
26276705 | 2625 | best_cost = alg_in->cost; |
076701b6 | 2626 | std::swap (alg_in, best_alg); |
819126a6 RK |
2627 | best_alg->log[best_alg->ops] = 0; |
2628 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
44037a66 | 2629 | } |
44037a66 | 2630 | } |
819126a6 RK |
2631 | else |
2632 | { | |
c61928d0 | 2633 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |
44037a66 | 2634 | |
5322d07e | 2635 | op_cost = add_cost (speed, mode); |
26276705 RS |
2636 | new_limit.cost = best_cost.cost - op_cost; |
2637 | new_limit.latency = best_cost.latency - op_cost; | |
2638 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
819126a6 | 2639 | |
26276705 RS |
2640 | alg_in->cost.cost += op_cost; |
2641 | alg_in->cost.latency += op_cost; | |
2642 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
819126a6 | 2643 | { |
26276705 | 2644 | best_cost = alg_in->cost; |
076701b6 | 2645 | std::swap (alg_in, best_alg); |
819126a6 RK |
2646 | best_alg->log[best_alg->ops] = 0; |
2647 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
819126a6 RK |
2648 | } |
2649 | } | |
ef268d34 KH |
2650 | |
2651 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2652 | quickly with a - a * n for some appropriate constant n. */ | |
2653 | m = exact_log2 (-orig_t + 1); | |
2654 | if (m >= 0 && m < maxm) | |
2655 | { | |
35430ca0 KT |
2656 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2657 | /* If the target has a cheap shift-and-subtract insn use | |
2658 | that in preference to a shift insn followed by a sub insn. | |
2659 | Assume that the shift-and-sub is "atomic" with a latency | |
2660 | equal to it's cost, otherwise assume that on superscalar | |
2661 | hardware the shift may be executed concurrently with the | |
2662 | earlier steps in the algorithm. */ | |
2663 | if (shiftsub1_cost (speed, mode, m) <= op_cost) | |
2664 | { | |
2665 | op_cost = shiftsub1_cost (speed, mode, m); | |
2666 | op_latency = op_cost; | |
2667 | } | |
2668 | else | |
2669 | op_latency = add_cost (speed, mode); | |
2670 | ||
ef268d34 | 2671 | new_limit.cost = best_cost.cost - op_cost; |
35430ca0 | 2672 | new_limit.latency = best_cost.latency - op_latency; |
84ddb681 RH |
2673 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
2674 | &new_limit, mode); | |
ef268d34 KH |
2675 | |
2676 | alg_in->cost.cost += op_cost; | |
35430ca0 | 2677 | alg_in->cost.latency += op_latency; |
ef268d34 KH |
2678 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2679 | { | |
ef268d34 | 2680 | best_cost = alg_in->cost; |
076701b6 | 2681 | std::swap (alg_in, best_alg); |
ef268d34 KH |
2682 | best_alg->log[best_alg->ops] = m; |
2683 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
2684 | } | |
2685 | } | |
2686 | ||
7b13ee6b KH |
2687 | if (cache_hit) |
2688 | goto done; | |
819126a6 | 2689 | } |
63610db9 | 2690 | |
44037a66 | 2691 | /* Look for factors of t of the form |
7963ac37 | 2692 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
44037a66 | 2693 | If we find such a factor, we can multiply by t using an algorithm that |
7963ac37 | 2694 | multiplies by q, shift the result by m and add/subtract it to itself. |
44037a66 | 2695 | |
7963ac37 RK |
2696 | We search for large factors first and loop down, even if large factors |
2697 | are less probable than small; if we find a large factor we will find a | |
2698 | good sequence quickly, and therefore be able to prune (by decreasing | |
2699 | COST_LIMIT) the search. */ | |
2700 | ||
7b13ee6b | 2701 | do_alg_addsub_factor: |
7963ac37 | 2702 | for (m = floor_log2 (t - 1); m >= 2; m--) |
44037a66 | 2703 | { |
7963ac37 | 2704 | unsigned HOST_WIDE_INT d; |
44037a66 | 2705 | |
7963ac37 | 2706 | d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; |
7b13ee6b KH |
2707 | if (t % d == 0 && t > d && m < maxm |
2708 | && (!cache_hit || cache_alg == alg_add_factor)) | |
44037a66 | 2709 | { |
5322d07e | 2710 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
35430ca0 KT |
2711 | if (shiftadd_cost (speed, mode, m) <= op_cost) |
2712 | op_cost = shiftadd_cost (speed, mode, m); | |
2713 | ||
2714 | op_latency = op_cost; | |
2715 | ||
26276705 RS |
2716 | |
2717 | new_limit.cost = best_cost.cost - op_cost; | |
2718 | new_limit.latency = best_cost.latency - op_latency; | |
2719 | synth_mult (alg_in, t / d, &new_limit, mode); | |
44037a66 | 2720 | |
26276705 RS |
2721 | alg_in->cost.cost += op_cost; |
2722 | alg_in->cost.latency += op_latency; | |
2723 | if (alg_in->cost.latency < op_cost) | |
2724 | alg_in->cost.latency = op_cost; | |
2725 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2726 | { |
26276705 | 2727 | best_cost = alg_in->cost; |
076701b6 | 2728 | std::swap (alg_in, best_alg); |
b385aeda | 2729 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2730 | best_alg->op[best_alg->ops] = alg_add_factor; |
44037a66 | 2731 | } |
c0b262c1 TG |
2732 | /* Other factors will have been taken care of in the recursion. */ |
2733 | break; | |
44037a66 TG |
2734 | } |
2735 | ||
7963ac37 | 2736 | d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; |
7b13ee6b KH |
2737 | if (t % d == 0 && t > d && m < maxm |
2738 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
44037a66 | 2739 | { |
5322d07e | 2740 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
35430ca0 KT |
2741 | if (shiftsub0_cost (speed, mode, m) <= op_cost) |
2742 | op_cost = shiftsub0_cost (speed, mode, m); | |
2743 | ||
2744 | op_latency = op_cost; | |
26276705 RS |
2745 | |
2746 | new_limit.cost = best_cost.cost - op_cost; | |
417c735c | 2747 | new_limit.latency = best_cost.latency - op_latency; |
26276705 | 2748 | synth_mult (alg_in, t / d, &new_limit, mode); |
44037a66 | 2749 | |
26276705 RS |
2750 | alg_in->cost.cost += op_cost; |
2751 | alg_in->cost.latency += op_latency; | |
2752 | if (alg_in->cost.latency < op_cost) | |
2753 | alg_in->cost.latency = op_cost; | |
2754 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2755 | { |
26276705 | 2756 | best_cost = alg_in->cost; |
076701b6 | 2757 | std::swap (alg_in, best_alg); |
b385aeda | 2758 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2759 | best_alg->op[best_alg->ops] = alg_sub_factor; |
44037a66 | 2760 | } |
c0b262c1 | 2761 | break; |
44037a66 TG |
2762 | } |
2763 | } | |
7b13ee6b KH |
2764 | if (cache_hit) |
2765 | goto done; | |
44037a66 | 2766 | |
7963ac37 RK |
2767 | /* Try shift-and-add (load effective address) instructions, |
2768 | i.e. do a*3, a*5, a*9. */ | |
2769 | if ((t & 1) != 0) | |
2770 | { | |
7b13ee6b | 2771 | do_alg_add_t2_m: |
7963ac37 RK |
2772 | q = t - 1; |
2773 | q = q & -q; | |
2774 | m = exact_log2 (q); | |
0792ab19 | 2775 | if (m >= 0 && m < maxm) |
b385aeda | 2776 | { |
5322d07e | 2777 | op_cost = shiftadd_cost (speed, mode, m); |
26276705 RS |
2778 | new_limit.cost = best_cost.cost - op_cost; |
2779 | new_limit.latency = best_cost.latency - op_cost; | |
2780 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2781 | ||
2782 | alg_in->cost.cost += op_cost; | |
2783 | alg_in->cost.latency += op_cost; | |
2784 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb | 2785 | { |
26276705 | 2786 | best_cost = alg_in->cost; |
076701b6 | 2787 | std::swap (alg_in, best_alg); |
5eebe2eb | 2788 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2789 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
5eebe2eb | 2790 | } |
7963ac37 | 2791 | } |
7b13ee6b KH |
2792 | if (cache_hit) |
2793 | goto done; | |
44037a66 | 2794 | |
7b13ee6b | 2795 | do_alg_sub_t2_m: |
7963ac37 RK |
2796 | q = t + 1; |
2797 | q = q & -q; | |
2798 | m = exact_log2 (q); | |
0792ab19 | 2799 | if (m >= 0 && m < maxm) |
b385aeda | 2800 | { |
5322d07e | 2801 | op_cost = shiftsub0_cost (speed, mode, m); |
26276705 RS |
2802 | new_limit.cost = best_cost.cost - op_cost; |
2803 | new_limit.latency = best_cost.latency - op_cost; | |
2804 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2805 | ||
2806 | alg_in->cost.cost += op_cost; | |
2807 | alg_in->cost.latency += op_cost; | |
2808 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb | 2809 | { |
26276705 | 2810 | best_cost = alg_in->cost; |
076701b6 | 2811 | std::swap (alg_in, best_alg); |
5eebe2eb | 2812 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2813 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
5eebe2eb | 2814 | } |
7963ac37 | 2815 | } |
7b13ee6b KH |
2816 | if (cache_hit) |
2817 | goto done; | |
7963ac37 | 2818 | } |
44037a66 | 2819 | |
7b13ee6b | 2820 | done: |
3ab0f290 DJ |
2821 | /* If best_cost has not decreased, we have not found any algorithm. */ |
2822 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
0178027c KH |
2823 | { |
2824 | /* We failed to find an algorithm. Record alg_impossible for | |
2825 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
2826 | we are asked to find an algorithm for T within the same or | |
2827 | lower COST_LIMIT, we can immediately return to the | |
2828 | caller. */ | |
5322d07e NF |
2829 | entry_ptr->t = t; |
2830 | entry_ptr->mode = mode; | |
2831 | entry_ptr->speed = speed; | |
2832 | entry_ptr->alg = alg_impossible; | |
2833 | entry_ptr->cost = *cost_limit; | |
0178027c KH |
2834 | return; |
2835 | } | |
3ab0f290 | 2836 | |
7b13ee6b KH |
2837 | /* Cache the result. */ |
2838 | if (!cache_hit) | |
2839 | { | |
5322d07e NF |
2840 | entry_ptr->t = t; |
2841 | entry_ptr->mode = mode; | |
2842 | entry_ptr->speed = speed; | |
2843 | entry_ptr->alg = best_alg->op[best_alg->ops]; | |
2844 | entry_ptr->cost.cost = best_cost.cost; | |
2845 | entry_ptr->cost.latency = best_cost.latency; | |
7b13ee6b KH |
2846 | } |
2847 | ||
52786026 RK |
2848 | /* If we are getting a too long sequence for `struct algorithm' |
2849 | to record, make this search fail. */ | |
2850 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
2851 | return; | |
2852 | ||
819126a6 RK |
2853 | /* Copy the algorithm from temporary space to the space at alg_out. |
2854 | We avoid using structure assignment because the majority of | |
2855 | best_alg is normally undefined, and this is a critical function. */ | |
2856 | alg_out->ops = best_alg->ops + 1; | |
26276705 | 2857 | alg_out->cost = best_cost; |
4e135bdd KG |
2858 | memcpy (alg_out->op, best_alg->op, |
2859 | alg_out->ops * sizeof *alg_out->op); | |
2860 | memcpy (alg_out->log, best_alg->log, | |
2861 | alg_out->ops * sizeof *alg_out->log); | |
44037a66 TG |
2862 | } |
2863 | \f | |
d1a6adeb | 2864 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
8efc8980 RS |
2865 | Try three variations: |
2866 | ||
2867 | - a shift/add sequence based on VAL itself | |
2868 | - a shift/add sequence based on -VAL, followed by a negation | |
2869 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
2870 | ||
f258e38b UW |
2871 | Return true if the cheapest of these cost less than MULT_COST, |
2872 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
8efc8980 RS |
2873 | |
2874 | static bool | |
ef4bddc2 | 2875 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
f258e38b UW |
2876 | struct algorithm *alg, enum mult_variant *variant, |
2877 | int mult_cost) | |
8efc8980 | 2878 | { |
8efc8980 | 2879 | struct algorithm alg2; |
26276705 RS |
2880 | struct mult_cost limit; |
2881 | int op_cost; | |
f40751dd | 2882 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 2883 | |
18eaea7f RS |
2884 | /* Fail quickly for impossible bounds. */ |
2885 | if (mult_cost < 0) | |
2886 | return false; | |
2887 | ||
2888 | /* Ensure that mult_cost provides a reasonable upper bound. | |
2889 | Any constant multiplication can be performed with less | |
2890 | than 2 * bits additions. */ | |
5322d07e | 2891 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
18eaea7f RS |
2892 | if (mult_cost > op_cost) |
2893 | mult_cost = op_cost; | |
2894 | ||
8efc8980 | 2895 | *variant = basic_variant; |
26276705 RS |
2896 | limit.cost = mult_cost; |
2897 | limit.latency = mult_cost; | |
2898 | synth_mult (alg, val, &limit, mode); | |
8efc8980 RS |
2899 | |
2900 | /* This works only if the inverted value actually fits in an | |
2901 | `unsigned int' */ | |
84ddb681 | 2902 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
8efc8980 | 2903 | { |
c3284718 | 2904 | op_cost = neg_cost (speed, mode); |
26276705 RS |
2905 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2906 | { | |
2907 | limit.cost = alg->cost.cost - op_cost; | |
2908 | limit.latency = alg->cost.latency - op_cost; | |
2909 | } | |
2910 | else | |
2911 | { | |
2912 | limit.cost = mult_cost - op_cost; | |
2913 | limit.latency = mult_cost - op_cost; | |
2914 | } | |
2915 | ||
2916 | synth_mult (&alg2, -val, &limit, mode); | |
2917 | alg2.cost.cost += op_cost; | |
2918 | alg2.cost.latency += op_cost; | |
2919 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2920 | *alg = alg2, *variant = negate_variant; |
2921 | } | |
2922 | ||
2923 | /* This proves very useful for division-by-constant. */ | |
5322d07e | 2924 | op_cost = add_cost (speed, mode); |
26276705 RS |
2925 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2926 | { | |
2927 | limit.cost = alg->cost.cost - op_cost; | |
2928 | limit.latency = alg->cost.latency - op_cost; | |
2929 | } | |
2930 | else | |
2931 | { | |
2932 | limit.cost = mult_cost - op_cost; | |
2933 | limit.latency = mult_cost - op_cost; | |
2934 | } | |
2935 | ||
2936 | synth_mult (&alg2, val - 1, &limit, mode); | |
2937 | alg2.cost.cost += op_cost; | |
2938 | alg2.cost.latency += op_cost; | |
2939 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2940 | *alg = alg2, *variant = add_variant; |
2941 | ||
26276705 | 2942 | return MULT_COST_LESS (&alg->cost, mult_cost); |
8efc8980 RS |
2943 | } |
2944 | ||
2945 | /* A subroutine of expand_mult, used for constant multiplications. | |
2946 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
2947 | convenient. Use the shift/add sequence described by ALG and apply | |
2948 | the final fixup specified by VARIANT. */ | |
2949 | ||
2950 | static rtx | |
ef4bddc2 | 2951 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
8efc8980 RS |
2952 | rtx target, const struct algorithm *alg, |
2953 | enum mult_variant variant) | |
2954 | { | |
2955 | HOST_WIDE_INT val_so_far; | |
f3f6fb16 DM |
2956 | rtx_insn *insn; |
2957 | rtx accum, tem; | |
8efc8980 | 2958 | int opno; |
ef4bddc2 | 2959 | machine_mode nmode; |
8efc8980 | 2960 | |
d448860e JH |
2961 | /* Avoid referencing memory over and over and invalid sharing |
2962 | on SUBREGs. */ | |
2963 | op0 = force_reg (mode, op0); | |
8efc8980 RS |
2964 | |
2965 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
2966 | the first operation. */ | |
2967 | ||
2968 | if (alg->op[0] == alg_zero) | |
2969 | { | |
84ddb681 | 2970 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
8efc8980 RS |
2971 | val_so_far = 0; |
2972 | } | |
2973 | else if (alg->op[0] == alg_m) | |
2974 | { | |
2975 | accum = copy_to_mode_reg (mode, op0); | |
2976 | val_so_far = 1; | |
2977 | } | |
2978 | else | |
5b0264cb | 2979 | gcc_unreachable (); |
8efc8980 RS |
2980 | |
2981 | for (opno = 1; opno < alg->ops; opno++) | |
2982 | { | |
2983 | int log = alg->log[opno]; | |
7c27e184 | 2984 | rtx shift_subtarget = optimize ? 0 : accum; |
8efc8980 RS |
2985 | rtx add_target |
2986 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
7c27e184 | 2987 | && !optimize) |
8efc8980 | 2988 | ? target : 0; |
7c27e184 | 2989 | rtx accum_target = optimize ? 0 : accum; |
7543f918 | 2990 | rtx accum_inner; |
8efc8980 RS |
2991 | |
2992 | switch (alg->op[opno]) | |
2993 | { | |
2994 | case alg_shift: | |
eb6c3df1 | 2995 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
4caa21a1 UB |
2996 | /* REG_EQUAL note will be attached to the following insn. */ |
2997 | emit_move_insn (accum, tem); | |
8efc8980 RS |
2998 | val_so_far <<= log; |
2999 | break; | |
3000 | ||
3001 | case alg_add_t_m2: | |
eb6c3df1 | 3002 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
3003 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3004 | add_target ? add_target : accum_target); | |
3005 | val_so_far += (HOST_WIDE_INT) 1 << log; | |
3006 | break; | |
3007 | ||
3008 | case alg_sub_t_m2: | |
eb6c3df1 | 3009 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
3010 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3011 | add_target ? add_target : accum_target); | |
3012 | val_so_far -= (HOST_WIDE_INT) 1 << log; | |
3013 | break; | |
3014 | ||
3015 | case alg_add_t2_m: | |
3016 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 3017 | log, shift_subtarget, 0); |
8efc8980 RS |
3018 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3019 | add_target ? add_target : accum_target); | |
3020 | val_so_far = (val_so_far << log) + 1; | |
3021 | break; | |
3022 | ||
3023 | case alg_sub_t2_m: | |
3024 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 3025 | log, shift_subtarget, 0); |
8efc8980 RS |
3026 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3027 | add_target ? add_target : accum_target); | |
3028 | val_so_far = (val_so_far << log) - 1; | |
3029 | break; | |
3030 | ||
3031 | case alg_add_factor: | |
eb6c3df1 | 3032 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 RS |
3033 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3034 | add_target ? add_target : accum_target); | |
3035 | val_so_far += val_so_far << log; | |
3036 | break; | |
3037 | ||
3038 | case alg_sub_factor: | |
eb6c3df1 | 3039 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 | 3040 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
7c27e184 PB |
3041 | (add_target |
3042 | ? add_target : (optimize ? 0 : tem))); | |
8efc8980 RS |
3043 | val_so_far = (val_so_far << log) - val_so_far; |
3044 | break; | |
3045 | ||
3046 | default: | |
5b0264cb | 3047 | gcc_unreachable (); |
8efc8980 RS |
3048 | } |
3049 | ||
84ddb681 | 3050 | if (SCALAR_INT_MODE_P (mode)) |
8efc8980 | 3051 | { |
84ddb681 RH |
3052 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3053 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
3054 | we've set the inner register and must properly indicate that. */ | |
3055 | tem = op0, nmode = mode; | |
3056 | accum_inner = accum; | |
3057 | if (GET_CODE (accum) == SUBREG) | |
3058 | { | |
3059 | accum_inner = SUBREG_REG (accum); | |
3060 | nmode = GET_MODE (accum_inner); | |
3061 | tem = gen_lowpart (nmode, op0); | |
3062 | } | |
8efc8980 | 3063 | |
84ddb681 RH |
3064 | insn = get_last_insn (); |
3065 | set_dst_reg_note (insn, REG_EQUAL, | |
4789c0ce RS |
3066 | gen_rtx_MULT (nmode, tem, |
3067 | gen_int_mode (val_so_far, nmode)), | |
84ddb681 RH |
3068 | accum_inner); |
3069 | } | |
8efc8980 RS |
3070 | } |
3071 | ||
3072 | if (variant == negate_variant) | |
3073 | { | |
3074 | val_so_far = -val_so_far; | |
3075 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3076 | } | |
3077 | else if (variant == add_variant) | |
3078 | { | |
3079 | val_so_far = val_so_far + 1; | |
3080 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3081 | } | |
3082 | ||
42eb30b5 ZW |
3083 | /* Compare only the bits of val and val_so_far that are significant |
3084 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
84ddb681 RH |
3085 | nmode = GET_MODE_INNER (mode); |
3086 | if (nmode == VOIDmode) | |
3087 | nmode = mode; | |
3088 | val &= GET_MODE_MASK (nmode); | |
3089 | val_so_far &= GET_MODE_MASK (nmode); | |
5b0264cb | 3090 | gcc_assert (val == val_so_far); |
8efc8980 RS |
3091 | |
3092 | return accum; | |
3093 | } | |
3094 | ||
44037a66 TG |
3095 | /* Perform a multiplication and return an rtx for the result. |
3096 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3097 | TARGET is a suggestion for where to store the result (an rtx). | |
3098 | ||
3099 | We check specially for a constant integer as OP1. | |
3100 | If you want this check for OP0 as well, then before calling | |
3101 | you should swap the two operands if OP0 would be constant. */ | |
3102 | ||
3103 | rtx | |
ef4bddc2 | 3104 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
f2593a66 | 3105 | int unsignedp) |
44037a66 | 3106 | { |
8efc8980 RS |
3107 | enum mult_variant variant; |
3108 | struct algorithm algorithm; | |
84ddb681 | 3109 | rtx scalar_op1; |
65dc9350 | 3110 | int max_cost; |
f40751dd | 3111 | bool speed = optimize_insn_for_speed_p (); |
84ddb681 | 3112 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
44037a66 | 3113 | |
84ddb681 | 3114 | if (CONSTANT_P (op0)) |
4c278134 | 3115 | std::swap (op0, op1); |
84ddb681 RH |
3116 | |
3117 | /* For vectors, there are several simplifications that can be made if | |
3118 | all elements of the vector constant are identical. */ | |
3119 | scalar_op1 = op1; | |
3120 | if (GET_CODE (op1) == CONST_VECTOR) | |
3121 | { | |
3122 | int i, n = CONST_VECTOR_NUNITS (op1); | |
3123 | scalar_op1 = CONST_VECTOR_ELT (op1, 0); | |
3124 | for (i = 1; i < n; ++i) | |
3125 | if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i))) | |
3126 | goto skip_scalar; | |
3127 | } | |
3128 | ||
3129 | if (INTEGRAL_MODE_P (mode)) | |
3130 | { | |
3131 | rtx fake_reg; | |
caf62455 JDA |
3132 | HOST_WIDE_INT coeff; |
3133 | bool is_neg; | |
84ddb681 RH |
3134 | int mode_bitsize; |
3135 | ||
3136 | if (op1 == CONST0_RTX (mode)) | |
3137 | return op1; | |
3138 | if (op1 == CONST1_RTX (mode)) | |
3139 | return op0; | |
3140 | if (op1 == CONSTM1_RTX (mode)) | |
3141 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, | |
3142 | op0, target, 0); | |
3143 | ||
3144 | if (do_trapv) | |
3145 | goto skip_synth; | |
3146 | ||
66b3ed5f JJ |
3147 | /* If mode is integer vector mode, check if the backend supports |
3148 | vector lshift (by scalar or vector) at all. If not, we can't use | |
3149 | synthetized multiply. */ | |
3150 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT | |
3151 | && optab_handler (vashl_optab, mode) == CODE_FOR_nothing | |
3152 | && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) | |
3153 | goto skip_synth; | |
3154 | ||
84ddb681 RH |
3155 | /* These are the operations that are potentially turned into |
3156 | a sequence of shifts and additions. */ | |
3157 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); | |
65dc9350 RS |
3158 | |
3159 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3160 | less than or equal in size to `unsigned int' this doesn't matter. | |
3161 | If the mode is larger than `unsigned int', then synth_mult works | |
3162 | only if the constant value exactly fits in an `unsigned int' without | |
3163 | any truncation. This means that multiplying by negative values does | |
3164 | not work; results are off by 2^32 on a 32 bit machine. */ | |
84ddb681 | 3165 | if (CONST_INT_P (scalar_op1)) |
58b42e19 | 3166 | { |
84ddb681 RH |
3167 | coeff = INTVAL (scalar_op1); |
3168 | is_neg = coeff < 0; | |
65dc9350 | 3169 | } |
807e902e KZ |
3170 | #if TARGET_SUPPORTS_WIDE_INT |
3171 | else if (CONST_WIDE_INT_P (scalar_op1)) | |
3172 | #else | |
48175537 | 3173 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
807e902e | 3174 | #endif |
65dc9350 | 3175 | { |
807e902e KZ |
3176 | int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode)); |
3177 | /* Perfect power of 2 (other than 1, which is handled above). */ | |
3178 | if (shift > 0) | |
3179 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3180 | shift, target, unsignedp); | |
caf62455 JDA |
3181 | else |
3182 | goto skip_synth; | |
65dc9350 | 3183 | } |
84ddb681 RH |
3184 | else |
3185 | goto skip_synth; | |
b8698a0f | 3186 | |
65dc9350 RS |
3187 | /* We used to test optimize here, on the grounds that it's better to |
3188 | produce a smaller program when -O is not used. But this causes | |
3189 | such a terrible slowdown sometimes that it seems better to always | |
3190 | use synth_mult. */ | |
65dc9350 | 3191 | |
84ddb681 | 3192 | /* Special case powers of two. */ |
be63b77d JJ |
3193 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3194 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) | |
84ddb681 RH |
3195 | return expand_shift (LSHIFT_EXPR, mode, op0, |
3196 | floor_log2 (coeff), target, unsignedp); | |
3197 | ||
3198 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3199 | ||
3200 | /* Attempt to handle multiplication of DImode values by negative | |
3201 | coefficients, by performing the multiplication by a positive | |
3202 | multiplier and then inverting the result. */ | |
84ddb681 RH |
3203 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3204 | { | |
3205 | /* Its safe to use -coeff even for INT_MIN, as the | |
3206 | result is interpreted as an unsigned coefficient. | |
3207 | Exclude cost of op0 from max_cost to match the cost | |
5e839bc8 | 3208 | calculation of the synth_mult. */ |
be63b77d | 3209 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
84ddb681 | 3210 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed) |
c3284718 | 3211 | - neg_cost (speed, mode)); |
be63b77d JJ |
3212 | if (max_cost <= 0) |
3213 | goto skip_synth; | |
3214 | ||
3215 | /* Special case powers of two. */ | |
3216 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3217 | { | |
3218 | rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, | |
3219 | floor_log2 (coeff), target, unsignedp); | |
3220 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3221 | } | |
3222 | ||
3223 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, | |
3224 | max_cost)) | |
84ddb681 | 3225 | { |
be63b77d | 3226 | rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, |
84ddb681 RH |
3227 | &algorithm, variant); |
3228 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3229 | } | |
b216b86b | 3230 | goto skip_synth; |
58b42e19 | 3231 | } |
44037a66 | 3232 | |
84ddb681 RH |
3233 | /* Exclude cost of op0 from max_cost to match the cost |
3234 | calculation of the synth_mult. */ | |
3235 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed); | |
3236 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) | |
3237 | return expand_mult_const (mode, op0, coeff, target, | |
3238 | &algorithm, variant); | |
f2593a66 | 3239 | } |
84ddb681 | 3240 | skip_synth: |
f2593a66 RS |
3241 | |
3242 | /* Expand x*2.0 as x+x. */ | |
48175537 | 3243 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)) |
f2593a66 RS |
3244 | { |
3245 | REAL_VALUE_TYPE d; | |
84ddb681 | 3246 | REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1); |
f2593a66 RS |
3247 | |
3248 | if (REAL_VALUES_EQUAL (d, dconst2)) | |
3249 | { | |
3250 | op0 = force_reg (GET_MODE (op0), op0); | |
3251 | return expand_binop (mode, add_optab, op0, op0, | |
3252 | target, unsignedp, OPTAB_LIB_WIDEN); | |
3253 | } | |
3254 | } | |
84ddb681 | 3255 | skip_scalar: |
f2593a66 | 3256 | |
819126a6 RK |
3257 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3258 | there is no difference between signed and unsigned. */ | |
84ddb681 | 3259 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
44037a66 | 3260 | op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); |
5b0264cb | 3261 | gcc_assert (op0); |
44037a66 TG |
3262 | return op0; |
3263 | } | |
5b58b39b | 3264 | |
6dd8f4bb BS |
3265 | /* Return a cost estimate for multiplying a register by the given |
3266 | COEFFicient in the given MODE and SPEED. */ | |
3267 | ||
3268 | int | |
ef4bddc2 | 3269 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
6dd8f4bb BS |
3270 | { |
3271 | int max_cost; | |
3272 | struct algorithm algorithm; | |
3273 | enum mult_variant variant; | |
3274 | ||
3275 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3276 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed); | |
3277 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) | |
3278 | return algorithm.cost.cost; | |
3279 | else | |
3280 | return max_cost; | |
3281 | } | |
3282 | ||
5b58b39b BS |
3283 | /* Perform a widening multiplication and return an rtx for the result. |
3284 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3285 | TARGET is a suggestion for where to store the result (an rtx). | |
3286 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3287 | or smul_widen_optab. | |
3288 | ||
3289 | We check specially for a constant integer as OP1, comparing the | |
3290 | cost of a widening multiply against the cost of a sequence of shifts | |
3291 | and adds. */ | |
3292 | ||
3293 | rtx | |
ef4bddc2 | 3294 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
5b58b39b BS |
3295 | int unsignedp, optab this_optab) |
3296 | { | |
3297 | bool speed = optimize_insn_for_speed_p (); | |
e7ef91dc | 3298 | rtx cop1; |
5b58b39b BS |
3299 | |
3300 | if (CONST_INT_P (op1) | |
e7ef91dc JJ |
3301 | && GET_MODE (op0) != VOIDmode |
3302 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, | |
3303 | this_optab == umul_widen_optab)) | |
3304 | && CONST_INT_P (cop1) | |
3305 | && (INTVAL (cop1) >= 0 | |
46c9550f | 3306 | || HWI_COMPUTABLE_MODE_P (mode))) |
5b58b39b | 3307 | { |
e7ef91dc | 3308 | HOST_WIDE_INT coeff = INTVAL (cop1); |
5b58b39b BS |
3309 | int max_cost; |
3310 | enum mult_variant variant; | |
3311 | struct algorithm algorithm; | |
3312 | ||
e9082138 JJ |
3313 | if (coeff == 0) |
3314 | return CONST0_RTX (mode); | |
3315 | ||
5b58b39b BS |
3316 | /* Special case powers of two. */ |
3317 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3318 | { | |
3319 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3320 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3321 | floor_log2 (coeff), target, unsignedp); |
5b58b39b BS |
3322 | } |
3323 | ||
3324 | /* Exclude cost of op0 from max_cost to match the cost | |
3325 | calculation of the synth_mult. */ | |
5322d07e | 3326 | max_cost = mul_widen_cost (speed, mode); |
5b58b39b BS |
3327 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3328 | max_cost)) | |
3329 | { | |
3330 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3331 | return expand_mult_const (mode, op0, coeff, target, | |
3332 | &algorithm, variant); | |
3333 | } | |
3334 | } | |
3335 | return expand_binop (mode, this_optab, op0, op1, target, | |
3336 | unsignedp, OPTAB_LIB_WIDEN); | |
3337 | } | |
44037a66 | 3338 | \f |
55c2d311 TG |
3339 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3340 | replace division by D, and put the least significant N bits of the result | |
3341 | in *MULTIPLIER_PTR and return the most significant bit. | |
3342 | ||
3343 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3344 | needed precision is in PRECISION (should be <= N). | |
3345 | ||
3346 | PRECISION should be as small as possible so this function can choose | |
3347 | multiplier more freely. | |
3348 | ||
3349 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3350 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3351 | ||
3352 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3353 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3354 | ||
55c2d311 | 3355 | unsigned HOST_WIDE_INT |
502b8322 | 3356 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
079c527f JJ |
3357 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3358 | int *post_shift_ptr, int *lgup_ptr) | |
55c2d311 | 3359 | { |
55c2d311 TG |
3360 | int lgup, post_shift; |
3361 | int pow, pow2; | |
55c2d311 TG |
3362 | |
3363 | /* lgup = ceil(log2(divisor)); */ | |
3364 | lgup = ceil_log2 (d); | |
3365 | ||
5b0264cb | 3366 | gcc_assert (lgup <= n); |
55c2d311 TG |
3367 | |
3368 | pow = n + lgup; | |
3369 | pow2 = n + lgup - precision; | |
3370 | ||
55c2d311 | 3371 | /* mlow = 2^(N + lgup)/d */ |
807e902e KZ |
3372 | wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); |
3373 | wide_int mlow = wi::udiv_trunc (val, d); | |
55c2d311 | 3374 | |
9be0ac8c | 3375 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
807e902e KZ |
3376 | val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); |
3377 | wide_int mhigh = wi::udiv_trunc (val, d); | |
55c2d311 TG |
3378 | |
3379 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3380 | (but they do not exceed 2^(N+1)). */ | |
3381 | ||
f9da5064 | 3382 | /* Reduce to lowest terms. */ |
55c2d311 TG |
3383 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3384 | { | |
807e902e KZ |
3385 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, |
3386 | HOST_BITS_PER_WIDE_INT); | |
3387 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, | |
3388 | HOST_BITS_PER_WIDE_INT); | |
55c2d311 TG |
3389 | if (ml_lo >= mh_lo) |
3390 | break; | |
3391 | ||
807e902e KZ |
3392 | mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3393 | mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); | |
55c2d311 TG |
3394 | } |
3395 | ||
3396 | *post_shift_ptr = post_shift; | |
3397 | *lgup_ptr = lgup; | |
3398 | if (n < HOST_BITS_PER_WIDE_INT) | |
3399 | { | |
3400 | unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; | |
807e902e KZ |
3401 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
3402 | return mhigh.to_uhwi () >= mask; | |
55c2d311 TG |
3403 | } |
3404 | else | |
3405 | { | |
807e902e KZ |
3406 | *multiplier_ptr = mhigh.to_uhwi (); |
3407 | return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); | |
55c2d311 TG |
3408 | } |
3409 | } | |
3410 | ||
3411 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3412 | congruent to 1 (mod 2**N). */ | |
3413 | ||
3414 | static unsigned HOST_WIDE_INT | |
502b8322 | 3415 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
55c2d311 | 3416 | { |
0f41302f | 3417 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
55c2d311 TG |
3418 | |
3419 | /* The algorithm notes that the choice y = x satisfies | |
3420 | x*y == 1 mod 2^3, since x is assumed odd. | |
3421 | Each iteration doubles the number of bits of significance in y. */ | |
3422 | ||
3423 | unsigned HOST_WIDE_INT mask; | |
3424 | unsigned HOST_WIDE_INT y = x; | |
3425 | int nbit = 3; | |
3426 | ||
3427 | mask = (n == HOST_BITS_PER_WIDE_INT | |
3428 | ? ~(unsigned HOST_WIDE_INT) 0 | |
3429 | : ((unsigned HOST_WIDE_INT) 1 << n) - 1); | |
3430 | ||
3431 | while (nbit < n) | |
3432 | { | |
3433 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3434 | nbit *= 2; | |
3435 | } | |
3436 | return y; | |
3437 | } | |
3438 | ||
3439 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3440 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3441 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3442 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3443 | become signed. | |
3444 | ||
3445 | The result is put in TARGET if that is convenient. | |
3446 | ||
3447 | MODE is the mode of operation. */ | |
3448 | ||
3449 | rtx | |
ef4bddc2 | 3450 | expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0, |
502b8322 | 3451 | rtx op1, rtx target, int unsignedp) |
55c2d311 TG |
3452 | { |
3453 | rtx tem; | |
3454 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3455 | ||
3456 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3457 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3458 | tem = expand_and (mode, tem, op1, NULL_RTX); |
38a448ca RH |
3459 | adj_operand |
3460 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3461 | adj_operand); | |
55c2d311 TG |
3462 | |
3463 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
eb6c3df1 | 3464 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3465 | tem = expand_and (mode, tem, op0, NULL_RTX); |
38a448ca RH |
3466 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3467 | target); | |
55c2d311 TG |
3468 | |
3469 | return target; | |
3470 | } | |
3471 | ||
00f07b86 | 3472 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
55c2d311 | 3473 | |
8efc8980 | 3474 | static rtx |
ef4bddc2 | 3475 | extract_high_half (machine_mode mode, rtx op) |
8efc8980 | 3476 | { |
ef4bddc2 | 3477 | machine_mode wider_mode; |
55c2d311 | 3478 | |
8efc8980 RS |
3479 | if (mode == word_mode) |
3480 | return gen_highpart (mode, op); | |
71af73bb | 3481 | |
15ed7b52 JG |
3482 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3483 | ||
8efc8980 RS |
3484 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3485 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, | |
eb6c3df1 | 3486 | GET_MODE_BITSIZE (mode), 0, 1); |
8efc8980 RS |
3487 | return convert_modes (mode, wider_mode, op, 0); |
3488 | } | |
55c2d311 | 3489 | |
00f07b86 | 3490 | /* Like expmed_mult_highpart, but only consider using a multiplication |
8efc8980 RS |
3491 | optab. OP1 is an rtx for the constant operand. */ |
3492 | ||
3493 | static rtx | |
ef4bddc2 | 3494 | expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1, |
8efc8980 | 3495 | rtx target, int unsignedp, int max_cost) |
55c2d311 | 3496 | { |
665acd1e | 3497 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
ef4bddc2 | 3498 | machine_mode wider_mode; |
55c2d311 TG |
3499 | optab moptab; |
3500 | rtx tem; | |
8efc8980 | 3501 | int size; |
f40751dd | 3502 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3503 | |
15ed7b52 JG |
3504 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3505 | ||
8efc8980 RS |
3506 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3507 | size = GET_MODE_BITSIZE (mode); | |
55c2d311 TG |
3508 | |
3509 | /* Firstly, try using a multiplication insn that only generates the needed | |
3510 | high part of the product, and in the sign flavor of unsignedp. */ | |
5322d07e | 3511 | if (mul_highpart_cost (speed, mode) < max_cost) |
71af73bb | 3512 | { |
8efc8980 | 3513 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
665acd1e | 3514 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3515 | unsignedp, OPTAB_DIRECT); |
3516 | if (tem) | |
3517 | return tem; | |
71af73bb | 3518 | } |
55c2d311 TG |
3519 | |
3520 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3521 | Need to adjust the result after the multiplication. */ | |
02a65aef | 3522 | if (size - 1 < BITS_PER_WORD |
5322d07e NF |
3523 | && (mul_highpart_cost (speed, mode) |
3524 | + 2 * shift_cost (speed, mode, size-1) | |
3525 | + 4 * add_cost (speed, mode) < max_cost)) | |
71af73bb | 3526 | { |
8efc8980 | 3527 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
665acd1e | 3528 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3529 | unsignedp, OPTAB_DIRECT); |
3530 | if (tem) | |
71af73bb | 3531 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3532 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
8efc8980 | 3533 | tem, unsignedp); |
71af73bb | 3534 | } |
55c2d311 | 3535 | |
71af73bb | 3536 | /* Try widening multiplication. */ |
55c2d311 | 3537 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
4d8752f0 | 3538 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
5322d07e | 3539 | && mul_widen_cost (speed, wider_mode) < max_cost) |
a295d331 | 3540 | { |
665acd1e | 3541 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
8efc8980 RS |
3542 | unsignedp, OPTAB_WIDEN); |
3543 | if (tem) | |
3544 | return extract_high_half (mode, tem); | |
c410d49e | 3545 | } |
71af73bb TG |
3546 | |
3547 | /* Try widening the mode and perform a non-widening multiplication. */ | |
947131ba | 3548 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
02a65aef | 3549 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3550 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |
3551 | < max_cost)) | |
a295d331 | 3552 | { |
f3f6fb16 DM |
3553 | rtx_insn *insns; |
3554 | rtx wop0, wop1; | |
82dfb9a5 RS |
3555 | |
3556 | /* We need to widen the operands, for example to ensure the | |
3557 | constant multiplier is correctly sign or zero extended. | |
3558 | Use a sequence to clean-up any instructions emitted by | |
3559 | the conversions if things don't work out. */ | |
3560 | start_sequence (); | |
3561 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3562 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3563 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
8efc8980 | 3564 | unsignedp, OPTAB_WIDEN); |
82dfb9a5 RS |
3565 | insns = get_insns (); |
3566 | end_sequence (); | |
3567 | ||
8efc8980 | 3568 | if (tem) |
82dfb9a5 RS |
3569 | { |
3570 | emit_insn (insns); | |
3571 | return extract_high_half (mode, tem); | |
3572 | } | |
a295d331 | 3573 | } |
71af73bb TG |
3574 | |
3575 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3576 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
4d8752f0 | 3577 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
02a65aef | 3578 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3579 | && (mul_widen_cost (speed, wider_mode) |
3580 | + 2 * shift_cost (speed, mode, size-1) | |
3581 | + 4 * add_cost (speed, mode) < max_cost)) | |
55c2d311 | 3582 | { |
665acd1e | 3583 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
71af73bb TG |
3584 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3585 | if (tem != 0) | |
55c2d311 | 3586 | { |
8efc8980 | 3587 | tem = extract_high_half (mode, tem); |
71af73bb | 3588 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3589 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
71af73bb | 3590 | target, unsignedp); |
55c2d311 | 3591 | } |
55c2d311 TG |
3592 | } |
3593 | ||
71af73bb | 3594 | return 0; |
8efc8980 | 3595 | } |
71af73bb | 3596 | |
0d282692 RS |
3597 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3598 | putting the high half of the result in TARGET if that is convenient, | |
3599 | and return where the result is. If the operation can not be performed, | |
3600 | 0 is returned. | |
55c2d311 | 3601 | |
8efc8980 RS |
3602 | MODE is the mode of operation and result. |
3603 | ||
3604 | UNSIGNEDP nonzero means unsigned multiply. | |
3605 | ||
3606 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3607 | ||
0d282692 | 3608 | static rtx |
ef4bddc2 | 3609 | expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1, |
0d282692 | 3610 | rtx target, int unsignedp, int max_cost) |
8efc8980 | 3611 | { |
ef4bddc2 | 3612 | machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
0d282692 | 3613 | unsigned HOST_WIDE_INT cnst1; |
f258e38b UW |
3614 | int extra_cost; |
3615 | bool sign_adjust = false; | |
8efc8980 RS |
3616 | enum mult_variant variant; |
3617 | struct algorithm alg; | |
0d282692 | 3618 | rtx tem; |
f40751dd | 3619 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 3620 | |
15ed7b52 | 3621 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
8efc8980 | 3622 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
46c9550f | 3623 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
8efc8980 | 3624 | |
0d282692 | 3625 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
f258e38b | 3626 | |
b8698a0f L |
3627 | /* We can't optimize modes wider than BITS_PER_WORD. |
3628 | ??? We might be able to perform double-word arithmetic if | |
f258e38b UW |
3629 | mode == word_mode, however all the cost calculations in |
3630 | synth_mult etc. assume single-word operations. */ | |
3631 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) | |
00f07b86 | 3632 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
f258e38b UW |
3633 | unsignedp, max_cost); |
3634 | ||
5322d07e | 3635 | extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); |
f258e38b UW |
3636 | |
3637 | /* Check whether we try to multiply by a negative constant. */ | |
3638 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3639 | { | |
3640 | sign_adjust = true; | |
5322d07e | 3641 | extra_cost += add_cost (speed, mode); |
f258e38b | 3642 | } |
8efc8980 RS |
3643 | |
3644 | /* See whether shift/add multiplication is cheap enough. */ | |
f258e38b UW |
3645 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3646 | max_cost - extra_cost)) | |
a295d331 | 3647 | { |
8efc8980 RS |
3648 | /* See whether the specialized multiplication optabs are |
3649 | cheaper than the shift/add version. */ | |
00f07b86 | 3650 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
26276705 | 3651 | alg.cost.cost + extra_cost); |
8efc8980 RS |
3652 | if (tem) |
3653 | return tem; | |
3654 | ||
f258e38b UW |
3655 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3656 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3657 | tem = extract_high_half (mode, tem); | |
3658 | ||
9cf737f8 | 3659 | /* Adjust result for signedness. */ |
f258e38b UW |
3660 | if (sign_adjust) |
3661 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3662 | ||
3663 | return tem; | |
a295d331 | 3664 | } |
00f07b86 | 3665 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
8efc8980 | 3666 | unsignedp, max_cost); |
55c2d311 | 3667 | } |
0b55e932 RS |
3668 | |
3669 | ||
3670 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3671 | ||
3672 | static rtx | |
ef4bddc2 | 3673 | expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) |
0b55e932 | 3674 | { |
f3f6fb16 DM |
3675 | rtx result, temp, shift; |
3676 | rtx_code_label *label; | |
0b55e932 | 3677 | int logd; |
807e902e | 3678 | int prec = GET_MODE_PRECISION (mode); |
0b55e932 RS |
3679 | |
3680 | logd = floor_log2 (d); | |
3681 | result = gen_reg_rtx (mode); | |
3682 | ||
3683 | /* Avoid conditional branches when they're expensive. */ | |
3a4fd356 | 3684 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
22660666 | 3685 | && optimize_insn_for_speed_p ()) |
0b55e932 RS |
3686 | { |
3687 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3688 | mode, 0, -1); | |
3689 | if (signmask) | |
3690 | { | |
807e902e | 3691 | HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
0b55e932 | 3692 | signmask = force_reg (mode, signmask); |
1c234fcb RS |
3693 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3694 | ||
3695 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3696 | which instruction sequence to use. If logical right shifts | |
3697 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3698 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
6e7a355c | 3699 | |
1c234fcb | 3700 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
947131ba | 3701 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
5e8f01f4 RS |
3702 | || (set_src_cost (temp, optimize_insn_for_speed_p ()) |
3703 | > COSTS_N_INSNS (2))) | |
1c234fcb RS |
3704 | { |
3705 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3706 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3707 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3708 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
3709 | temp = expand_binop (mode, and_optab, temp, |
3710 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
3711 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3712 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3713 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3714 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3715 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3716 | } | |
3717 | else | |
3718 | { | |
3719 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3720 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3721 | signmask = force_reg (mode, signmask); | |
3722 | ||
3723 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3724 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
3725 | temp = expand_binop (mode, and_optab, temp, |
3726 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
3727 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3728 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3729 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3730 | } | |
0b55e932 RS |
3731 | return temp; |
3732 | } | |
3733 | } | |
3734 | ||
3735 | /* Mask contains the mode's signbit and the significant bits of the | |
3736 | modulus. By including the signbit in the operation, many targets | |
3737 | can avoid an explicit compare operation in the following comparison | |
3738 | against zero. */ | |
807e902e KZ |
3739 | wide_int mask = wi::mask (logd, false, prec); |
3740 | mask = wi::set_bit (mask, prec - 1); | |
0b55e932 | 3741 | |
6e7a355c | 3742 | temp = expand_binop (mode, and_optab, op0, |
807e902e | 3743 | immed_wide_int_const (mask, mode), |
6e7a355c | 3744 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
3745 | if (temp != result) |
3746 | emit_move_insn (result, temp); | |
3747 | ||
3748 | label = gen_label_rtx (); | |
3749 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3750 | ||
3751 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3752 | 0, OPTAB_LIB_WIDEN); | |
807e902e KZ |
3753 | |
3754 | mask = wi::mask (logd, true, prec); | |
6e7a355c | 3755 | temp = expand_binop (mode, ior_optab, temp, |
807e902e | 3756 | immed_wide_int_const (mask, mode), |
6e7a355c | 3757 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
3758 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3759 | 0, OPTAB_LIB_WIDEN); | |
3760 | if (temp != result) | |
3761 | emit_move_insn (result, temp); | |
3762 | emit_label (label); | |
3763 | return result; | |
3764 | } | |
39cab019 RS |
3765 | |
3766 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3767 | This routine is only called for positive values of D. */ | |
3768 | ||
3769 | static rtx | |
ef4bddc2 | 3770 | expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) |
39cab019 | 3771 | { |
f3f6fb16 DM |
3772 | rtx temp; |
3773 | rtx_code_label *label; | |
39cab019 RS |
3774 | int logd; |
3775 | ||
3776 | logd = floor_log2 (d); | |
39cab019 | 3777 | |
3a4fd356 JH |
3778 | if (d == 2 |
3779 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
3780 | false) >= 1) | |
39cab019 RS |
3781 | { |
3782 | temp = gen_reg_rtx (mode); | |
3783 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3784 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3785 | 0, OPTAB_LIB_WIDEN); | |
eb6c3df1 | 3786 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 RS |
3787 | } |
3788 | ||
692e0312 TS |
3789 | if (HAVE_conditional_move |
3790 | && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) | |
fdded401 RS |
3791 | { |
3792 | rtx temp2; | |
3793 | ||
3794 | start_sequence (); | |
3795 | temp2 = copy_to_mode_reg (mode, op0); | |
2f1cd2eb | 3796 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
fdded401 RS |
3797 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3798 | temp = force_reg (mode, temp); | |
3799 | ||
3800 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3801 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3802 | mode, temp, temp2, mode, 0); | |
3803 | if (temp2) | |
3804 | { | |
f3f6fb16 | 3805 | rtx_insn *seq = get_insns (); |
fdded401 RS |
3806 | end_sequence (); |
3807 | emit_insn (seq); | |
eb6c3df1 | 3808 | return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); |
fdded401 RS |
3809 | } |
3810 | end_sequence (); | |
3811 | } | |
fdded401 | 3812 | |
3a4fd356 JH |
3813 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
3814 | false) >= 2) | |
39cab019 RS |
3815 | { |
3816 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3817 | ||
3818 | temp = gen_reg_rtx (mode); | |
3819 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
00a7ba58 JJ |
3820 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD |
3821 | || shift_cost (optimize_insn_for_speed_p (), mode, ushift) | |
3822 | > COSTS_N_INSNS (1)) | |
2f1cd2eb | 3823 | temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), |
39cab019 RS |
3824 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3825 | else | |
3826 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
eb6c3df1 | 3827 | ushift, NULL_RTX, 1); |
39cab019 RS |
3828 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3829 | 0, OPTAB_LIB_WIDEN); | |
eb6c3df1 | 3830 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 RS |
3831 | } |
3832 | ||
3833 | label = gen_label_rtx (); | |
3834 | temp = copy_to_mode_reg (mode, op0); | |
3835 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
2f1cd2eb | 3836 | expand_inc (temp, gen_int_mode (d - 1, mode)); |
39cab019 | 3837 | emit_label (label); |
eb6c3df1 | 3838 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 | 3839 | } |
55c2d311 | 3840 | \f |
44037a66 TG |
3841 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
3842 | if that is convenient, and returning where the result is. | |
3843 | You may request either the quotient or the remainder as the result; | |
3844 | specify REM_FLAG nonzero to get the remainder. | |
3845 | ||
3846 | CODE is the expression code for which kind of division this is; | |
3847 | it controls how rounding is done. MODE is the machine mode to use. | |
3848 | UNSIGNEDP nonzero means do unsigned division. */ | |
3849 | ||
3850 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
3851 | and then correct it by or'ing in missing high bits | |
3852 | if result of ANDI is nonzero. | |
3853 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
3854 | This could optimize to a bfexts instruction. | |
3855 | But C doesn't use these operations, so their optimizations are | |
3856 | left for later. */ | |
5353610b R |
3857 | /* ??? For modulo, we don't actually need the highpart of the first product, |
3858 | the low part will do nicely. And for small divisors, the second multiply | |
3859 | can also be a low-part only multiply or even be completely left out. | |
3860 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
3861 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
3862 | the result is exact for inputs up to 0x1fffffff. | |
3863 | The input range can be reduced by using cross-sum rules. | |
3864 | For odd divisors >= 3, the following table gives right shift counts | |
09da1532 | 3865 | so that if a number is shifted by an integer multiple of the given |
5353610b R |
3866 | amount, the remainder stays the same: |
3867 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
3868 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
3869 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
3870 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
3871 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
3872 | ||
3873 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
3874 | to the right alone as the divisor has zeros to the right. | |
3875 | E.g. if x is an unsigned 32 bit number: | |
3876 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
3877 | */ | |
44037a66 TG |
3878 | |
3879 | rtx | |
ef4bddc2 | 3880 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
502b8322 | 3881 | rtx op0, rtx op1, rtx target, int unsignedp) |
44037a66 | 3882 | { |
ef4bddc2 | 3883 | machine_mode compute_mode; |
b3694847 | 3884 | rtx tquotient; |
55c2d311 | 3885 | rtx quotient = 0, remainder = 0; |
f3f6fb16 | 3886 | rtx_insn *last; |
2c414fba | 3887 | int size; |
f3f6fb16 | 3888 | rtx_insn *insn; |
44037a66 | 3889 | optab optab1, optab2; |
1c4a429a | 3890 | int op1_is_constant, op1_is_pow2 = 0; |
71af73bb | 3891 | int max_cost, extra_cost; |
9ec36da5 | 3892 | static HOST_WIDE_INT last_div_const = 0; |
f40751dd | 3893 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3894 | |
481683e1 | 3895 | op1_is_constant = CONST_INT_P (op1); |
1c4a429a JH |
3896 | if (op1_is_constant) |
3897 | { | |
be63b77d | 3898 | unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1); |
1c4a429a JH |
3899 | if (unsignedp) |
3900 | ext_op1 &= GET_MODE_MASK (mode); | |
3901 | op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) | |
3902 | || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); | |
3903 | } | |
55c2d311 TG |
3904 | |
3905 | /* | |
3906 | This is the structure of expand_divmod: | |
3907 | ||
3908 | First comes code to fix up the operands so we can perform the operations | |
3909 | correctly and efficiently. | |
3910 | ||
3911 | Second comes a switch statement with code specific for each rounding mode. | |
3912 | For some special operands this code emits all RTL for the desired | |
69f61901 | 3913 | operation, for other cases, it generates only a quotient and stores it in |
55c2d311 TG |
3914 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
3915 | to indicate that it has not done anything. | |
3916 | ||
69f61901 RK |
3917 | Last comes code that finishes the operation. If QUOTIENT is set and |
3918 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
3919 | QUOTIENT is not set, it is computed using trunc rounding. | |
44037a66 | 3920 | |
55c2d311 TG |
3921 | We try to generate special code for division and remainder when OP1 is a |
3922 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
3923 | operations. For other values of OP1, we compute a carefully selected | |
3924 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
3925 | by m. | |
3926 | ||
3927 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
3928 | half of the product. Different strategies for generating the product are | |
00f07b86 | 3929 | implemented in expmed_mult_highpart. |
55c2d311 TG |
3930 | |
3931 | If what we actually want is the remainder, we generate that by another | |
3932 | by-constant multiplication and a subtraction. */ | |
3933 | ||
3934 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
3d32ffd1 TW |
3935 | code below will malfunction if we are, so check here and handle |
3936 | the special case if so. */ | |
3937 | if (op1 == const1_rtx) | |
3938 | return rem_flag ? const0_rtx : op0; | |
3939 | ||
91ce572a CC |
3940 | /* When dividing by -1, we could get an overflow. |
3941 | negv_optab can handle overflows. */ | |
3942 | if (! unsignedp && op1 == constm1_rtx) | |
3943 | { | |
3944 | if (rem_flag) | |
0fb7aeda | 3945 | return const0_rtx; |
c3284718 | 3946 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
0fb7aeda | 3947 | ? negv_optab : neg_optab, op0, target, 0); |
91ce572a CC |
3948 | } |
3949 | ||
bc1c7e93 RK |
3950 | if (target |
3951 | /* Don't use the function value register as a target | |
3952 | since we have to read it as well as write it, | |
3953 | and function-inlining gets confused by this. */ | |
3954 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
3955 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
515dfc7a | 3956 | || ((rem_flag || op1_is_constant) |
bc1c7e93 | 3957 | && (reg_mentioned_p (target, op0) |
3c0cb5de | 3958 | || (MEM_P (op0) && MEM_P (target)))) |
bc1c7e93 | 3959 | || reg_mentioned_p (target, op1) |
3c0cb5de | 3960 | || (MEM_P (op1) && MEM_P (target)))) |
44037a66 TG |
3961 | target = 0; |
3962 | ||
44037a66 TG |
3963 | /* Get the mode in which to perform this computation. Normally it will |
3964 | be MODE, but sometimes we can't do the desired operation in MODE. | |
3965 | If so, pick a wider mode in which we can do the operation. Convert | |
3966 | to that mode at the start to avoid repeated conversions. | |
3967 | ||
3968 | First see what operations we need. These depend on the expression | |
3969 | we are evaluating. (We assume that divxx3 insns exist under the | |
3970 | same conditions that modxx3 insns and that these insns don't normally | |
3971 | fail. If these assumptions are not correct, we may generate less | |
3972 | efficient code in some cases.) | |
3973 | ||
3974 | Then see if we find a mode in which we can open-code that operation | |
3975 | (either a division, modulus, or shift). Finally, check for the smallest | |
3976 | mode for which we can do the operation with a library call. */ | |
3977 | ||
55c2d311 | 3978 | /* We might want to refine this now that we have division-by-constant |
00f07b86 | 3979 | optimization. Since expmed_mult_highpart tries so many variants, it is |
55c2d311 TG |
3980 | not straightforward to generalize this. Maybe we should make an array |
3981 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
3982 | ||
556a56ac DM |
3983 | optab1 = ((op1_is_pow2 && op1 != const0_rtx) |
3984 | ? (unsignedp ? lshr_optab : ashr_optab) | |
44037a66 | 3985 | : (unsignedp ? udiv_optab : sdiv_optab)); |
556a56ac DM |
3986 | optab2 = ((op1_is_pow2 && op1 != const0_rtx) |
3987 | ? optab1 | |
3988 | : (unsignedp ? udivmod_optab : sdivmod_optab)); | |
44037a66 TG |
3989 | |
3990 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3991 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
947131ba RS |
3992 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing |
3993 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
44037a66 TG |
3994 | break; |
3995 | ||
3996 | if (compute_mode == VOIDmode) | |
3997 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3998 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
8a33f100 JH |
3999 | if (optab_libfunc (optab1, compute_mode) |
4000 | || optab_libfunc (optab2, compute_mode)) | |
44037a66 TG |
4001 | break; |
4002 | ||
535a42b1 NS |
4003 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4004 | probably die. */ | |
44037a66 TG |
4005 | if (compute_mode == VOIDmode) |
4006 | compute_mode = mode; | |
4007 | ||
55c2d311 TG |
4008 | if (target && GET_MODE (target) == compute_mode) |
4009 | tquotient = target; | |
4010 | else | |
4011 | tquotient = gen_reg_rtx (compute_mode); | |
2c414fba | 4012 | |
55c2d311 TG |
4013 | size = GET_MODE_BITSIZE (compute_mode); |
4014 | #if 0 | |
4015 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
71af73bb TG |
4016 | (mode), and thereby get better code when OP1 is a constant. Do that |
4017 | later. It will require going over all usages of SIZE below. */ | |
55c2d311 TG |
4018 | size = GET_MODE_BITSIZE (mode); |
4019 | #endif | |
bc1c7e93 | 4020 | |
9ec36da5 JL |
4021 | /* Only deduct something for a REM if the last divide done was |
4022 | for a different constant. Then set the constant of the last | |
4023 | divide. */ | |
5322d07e NF |
4024 | max_cost = (unsignedp |
4025 | ? udiv_cost (speed, compute_mode) | |
4026 | : sdiv_cost (speed, compute_mode)); | |
a28b2ac6 RS |
4027 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4028 | && INTVAL (op1) == last_div_const)) | |
5322d07e NF |
4029 | max_cost -= (mul_cost (speed, compute_mode) |
4030 | + add_cost (speed, compute_mode)); | |
9ec36da5 JL |
4031 | |
4032 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
71af73bb | 4033 | |
55c2d311 | 4034 | /* Now convert to the best mode to use. */ |
44037a66 TG |
4035 | if (compute_mode != mode) |
4036 | { | |
55c2d311 | 4037 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
81722fa9 | 4038 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
e13a25d5 | 4039 | |
e9a25f70 JL |
4040 | /* convert_modes may have placed op1 into a register, so we |
4041 | must recompute the following. */ | |
481683e1 | 4042 | op1_is_constant = CONST_INT_P (op1); |
e13a25d5 DM |
4043 | op1_is_pow2 = (op1_is_constant |
4044 | && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
4045 | || (! unsignedp | |
be63b77d | 4046 | && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1)))))); |
44037a66 TG |
4047 | } |
4048 | ||
55c2d311 | 4049 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
c2a47e48 | 4050 | |
3c0cb5de | 4051 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
55c2d311 | 4052 | op0 = force_reg (compute_mode, op0); |
3c0cb5de | 4053 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
c2a47e48 RK |
4054 | op1 = force_reg (compute_mode, op1); |
4055 | ||
ab0b6581 TG |
4056 | /* If we need the remainder or if OP1 is constant, we need to |
4057 | put OP0 in a register in case it has any queued subexpressions. */ | |
4058 | if (rem_flag || op1_is_constant) | |
4059 | op0 = force_reg (compute_mode, op0); | |
bc1c7e93 | 4060 | |
55c2d311 | 4061 | last = get_last_insn (); |
44037a66 | 4062 | |
9faa82d8 | 4063 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
55c2d311 | 4064 | if (unsignedp) |
44037a66 | 4065 | { |
55c2d311 TG |
4066 | if (code == FLOOR_DIV_EXPR) |
4067 | code = TRUNC_DIV_EXPR; | |
4068 | if (code == FLOOR_MOD_EXPR) | |
4069 | code = TRUNC_MOD_EXPR; | |
db7cafb0 JL |
4070 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4071 | code = TRUNC_DIV_EXPR; | |
55c2d311 | 4072 | } |
bc1c7e93 | 4073 | |
55c2d311 TG |
4074 | if (op1 != const0_rtx) |
4075 | switch (code) | |
4076 | { | |
4077 | case TRUNC_MOD_EXPR: | |
4078 | case TRUNC_DIV_EXPR: | |
34f016ed | 4079 | if (op1_is_constant) |
55c2d311 | 4080 | { |
d8f1376c | 4081 | if (unsignedp) |
55c2d311 | 4082 | { |
079c527f | 4083 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4084 | int pre_shift, post_shift; |
4085 | int dummy; | |
1c4a429a JH |
4086 | unsigned HOST_WIDE_INT d = (INTVAL (op1) |
4087 | & GET_MODE_MASK (compute_mode)); | |
55c2d311 TG |
4088 | |
4089 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4090 | { | |
4091 | pre_shift = floor_log2 (d); | |
4092 | if (rem_flag) | |
4093 | { | |
2f1cd2eb RS |
4094 | unsigned HOST_WIDE_INT mask |
4095 | = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1; | |
db3cf6fb MS |
4096 | remainder |
4097 | = expand_binop (compute_mode, and_optab, op0, | |
2f1cd2eb | 4098 | gen_int_mode (mask, compute_mode), |
db3cf6fb MS |
4099 | remainder, 1, |
4100 | OPTAB_LIB_WIDEN); | |
55c2d311 | 4101 | if (remainder) |
c8dbc8ca | 4102 | return gen_lowpart (mode, remainder); |
55c2d311 TG |
4103 | } |
4104 | quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4105 | pre_shift, tquotient, 1); |
55c2d311 | 4106 | } |
34f016ed | 4107 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 4108 | { |
dc1d6150 | 4109 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) |
55c2d311 | 4110 | { |
dc1d6150 TG |
4111 | /* Most significant bit of divisor is set; emit an scc |
4112 | insn. */ | |
b45f0e58 PB |
4113 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4114 | compute_mode, 1, 1); | |
55c2d311 TG |
4115 | } |
4116 | else | |
4117 | { | |
dc1d6150 TG |
4118 | /* Find a suitable multiplier and right shift count |
4119 | instead of multiplying with D. */ | |
4120 | ||
4121 | mh = choose_multiplier (d, size, size, | |
4122 | &ml, &post_shift, &dummy); | |
4123 | ||
4124 | /* If the suggested multiplier is more than SIZE bits, | |
4125 | we can do better for even divisors, using an | |
4126 | initial right shift. */ | |
4127 | if (mh != 0 && (d & 1) == 0) | |
4128 | { | |
4129 | pre_shift = floor_log2 (d & -d); | |
4130 | mh = choose_multiplier (d >> pre_shift, size, | |
4131 | size - pre_shift, | |
4132 | &ml, &post_shift, &dummy); | |
5b0264cb | 4133 | gcc_assert (!mh); |
dc1d6150 TG |
4134 | } |
4135 | else | |
4136 | pre_shift = 0; | |
4137 | ||
4138 | if (mh != 0) | |
4139 | { | |
4140 | rtx t1, t2, t3, t4; | |
4141 | ||
02a65aef R |
4142 | if (post_shift - 1 >= BITS_PER_WORD) |
4143 | goto fail1; | |
4144 | ||
965703ed | 4145 | extra_cost |
5322d07e NF |
4146 | = (shift_cost (speed, compute_mode, post_shift - 1) |
4147 | + shift_cost (speed, compute_mode, 1) | |
4148 | + 2 * add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4149 | t1 = expmed_mult_highpart |
4150 | (compute_mode, op0, | |
4151 | gen_int_mode (ml, compute_mode), | |
4152 | NULL_RTX, 1, max_cost - extra_cost); | |
dc1d6150 TG |
4153 | if (t1 == 0) |
4154 | goto fail1; | |
38a448ca RH |
4155 | t2 = force_operand (gen_rtx_MINUS (compute_mode, |
4156 | op0, t1), | |
dc1d6150 | 4157 | NULL_RTX); |
eb6c3df1 RG |
4158 | t3 = expand_shift (RSHIFT_EXPR, compute_mode, |
4159 | t2, 1, NULL_RTX, 1); | |
38a448ca RH |
4160 | t4 = force_operand (gen_rtx_PLUS (compute_mode, |
4161 | t1, t3), | |
dc1d6150 | 4162 | NULL_RTX); |
4a90aeeb NS |
4163 | quotient = expand_shift |
4164 | (RSHIFT_EXPR, compute_mode, t4, | |
eb6c3df1 | 4165 | post_shift - 1, tquotient, 1); |
dc1d6150 TG |
4166 | } |
4167 | else | |
4168 | { | |
4169 | rtx t1, t2; | |
4170 | ||
02a65aef R |
4171 | if (pre_shift >= BITS_PER_WORD |
4172 | || post_shift >= BITS_PER_WORD) | |
4173 | goto fail1; | |
4174 | ||
4a90aeeb NS |
4175 | t1 = expand_shift |
4176 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4177 | pre_shift, NULL_RTX, 1); |
965703ed | 4178 | extra_cost |
5322d07e NF |
4179 | = (shift_cost (speed, compute_mode, pre_shift) |
4180 | + shift_cost (speed, compute_mode, post_shift)); | |
2f1cd2eb RS |
4181 | t2 = expmed_mult_highpart |
4182 | (compute_mode, t1, | |
4183 | gen_int_mode (ml, compute_mode), | |
4184 | NULL_RTX, 1, max_cost - extra_cost); | |
dc1d6150 TG |
4185 | if (t2 == 0) |
4186 | goto fail1; | |
4a90aeeb NS |
4187 | quotient = expand_shift |
4188 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4189 | post_shift, tquotient, 1); |
dc1d6150 | 4190 | } |
55c2d311 TG |
4191 | } |
4192 | } | |
34f016ed TG |
4193 | else /* Too wide mode to use tricky code */ |
4194 | break; | |
55c2d311 TG |
4195 | |
4196 | insn = get_last_insn (); | |
7543f918 JR |
4197 | if (insn != last) |
4198 | set_dst_reg_note (insn, REG_EQUAL, | |
4199 | gen_rtx_UDIV (compute_mode, op0, op1), | |
4200 | quotient); | |
55c2d311 TG |
4201 | } |
4202 | else /* TRUNC_DIV, signed */ | |
4203 | { | |
4204 | unsigned HOST_WIDE_INT ml; | |
4205 | int lgup, post_shift; | |
e71c0aa7 | 4206 | rtx mlr; |
55c2d311 | 4207 | HOST_WIDE_INT d = INTVAL (op1); |
e4c9f3c2 ILT |
4208 | unsigned HOST_WIDE_INT abs_d; |
4209 | ||
093253be ILT |
4210 | /* Since d might be INT_MIN, we have to cast to |
4211 | unsigned HOST_WIDE_INT before negating to avoid | |
4212 | undefined signed overflow. */ | |
6d9c91e9 ILT |
4213 | abs_d = (d >= 0 |
4214 | ? (unsigned HOST_WIDE_INT) d | |
4215 | : - (unsigned HOST_WIDE_INT) d); | |
55c2d311 TG |
4216 | |
4217 | /* n rem d = n rem -d */ | |
4218 | if (rem_flag && d < 0) | |
4219 | { | |
4220 | d = abs_d; | |
2496c7bd | 4221 | op1 = gen_int_mode (abs_d, compute_mode); |
55c2d311 TG |
4222 | } |
4223 | ||
4224 | if (d == 1) | |
4225 | quotient = op0; | |
4226 | else if (d == -1) | |
4227 | quotient = expand_unop (compute_mode, neg_optab, op0, | |
4228 | tquotient, 0); | |
f6c1336c ILT |
4229 | else if (HOST_BITS_PER_WIDE_INT >= size |
4230 | && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) | |
f737b132 RK |
4231 | { |
4232 | /* This case is not handled correctly below. */ | |
4233 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
4234 | compute_mode, 1, 1); | |
4235 | if (quotient == 0) | |
4236 | goto fail1; | |
4237 | } | |
55c2d311 | 4238 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
5322d07e NF |
4239 | && (rem_flag |
4240 | ? smod_pow2_cheap (speed, compute_mode) | |
4241 | : sdiv_pow2_cheap (speed, compute_mode)) | |
0b55e932 RS |
4242 | /* We assume that cheap metric is true if the |
4243 | optab has an expander for this mode. */ | |
166cdb08 JH |
4244 | && ((optab_handler ((rem_flag ? smod_optab |
4245 | : sdiv_optab), | |
947131ba | 4246 | compute_mode) |
a8c7e72d | 4247 | != CODE_FOR_nothing) |
947131ba RS |
4248 | || (optab_handler (sdivmod_optab, |
4249 | compute_mode) | |
4250 | != CODE_FOR_nothing))) | |
55c2d311 TG |
4251 | ; |
4252 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) | |
4253 | { | |
0b55e932 RS |
4254 | if (rem_flag) |
4255 | { | |
4256 | remainder = expand_smod_pow2 (compute_mode, op0, d); | |
4257 | if (remainder) | |
4258 | return gen_lowpart (mode, remainder); | |
4259 | } | |
3d520aaf | 4260 | |
5322d07e | 4261 | if (sdiv_pow2_cheap (speed, compute_mode) |
947131ba | 4262 | && ((optab_handler (sdiv_optab, compute_mode) |
3d520aaf | 4263 | != CODE_FOR_nothing) |
947131ba | 4264 | || (optab_handler (sdivmod_optab, compute_mode) |
3d520aaf DE |
4265 | != CODE_FOR_nothing))) |
4266 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
4267 | compute_mode, op0, | |
4268 | gen_int_mode (abs_d, | |
4269 | compute_mode), | |
4270 | NULL_RTX, 0); | |
4271 | else | |
4272 | quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); | |
55c2d311 | 4273 | |
0b55e932 RS |
4274 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4275 | negate the quotient. */ | |
55c2d311 TG |
4276 | if (d < 0) |
4277 | { | |
4278 | insn = get_last_insn (); | |
4e430df8 | 4279 | if (insn != last |
c8e7fe58 DE |
4280 | && abs_d < ((unsigned HOST_WIDE_INT) 1 |
4281 | << (HOST_BITS_PER_WIDE_INT - 1))) | |
7543f918 JR |
4282 | set_dst_reg_note (insn, REG_EQUAL, |
4283 | gen_rtx_DIV (compute_mode, op0, | |
6d26322f JR |
4284 | gen_int_mode |
4285 | (abs_d, | |
4286 | compute_mode)), | |
7543f918 | 4287 | quotient); |
55c2d311 TG |
4288 | |
4289 | quotient = expand_unop (compute_mode, neg_optab, | |
4290 | quotient, quotient, 0); | |
4291 | } | |
4292 | } | |
34f016ed | 4293 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4294 | { |
4295 | choose_multiplier (abs_d, size, size - 1, | |
079c527f | 4296 | &ml, &post_shift, &lgup); |
55c2d311 TG |
4297 | if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) |
4298 | { | |
4299 | rtx t1, t2, t3; | |
4300 | ||
02a65aef R |
4301 | if (post_shift >= BITS_PER_WORD |
4302 | || size - 1 >= BITS_PER_WORD) | |
4303 | goto fail1; | |
4304 | ||
5322d07e NF |
4305 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4306 | + shift_cost (speed, compute_mode, size - 1) | |
4307 | + add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4308 | t1 = expmed_mult_highpart |
4309 | (compute_mode, op0, gen_int_mode (ml, compute_mode), | |
4310 | NULL_RTX, 0, max_cost - extra_cost); | |
55c2d311 TG |
4311 | if (t1 == 0) |
4312 | goto fail1; | |
4a90aeeb NS |
4313 | t2 = expand_shift |
4314 | (RSHIFT_EXPR, compute_mode, t1, | |
eb6c3df1 | 4315 | post_shift, NULL_RTX, 0); |
4a90aeeb NS |
4316 | t3 = expand_shift |
4317 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4318 | size - 1, NULL_RTX, 0); |
55c2d311 | 4319 | if (d < 0) |
c5c76735 JL |
4320 | quotient |
4321 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4322 | t3, t2), | |
4323 | tquotient); | |
55c2d311 | 4324 | else |
c5c76735 JL |
4325 | quotient |
4326 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4327 | t2, t3), | |
4328 | tquotient); | |
55c2d311 TG |
4329 | } |
4330 | else | |
4331 | { | |
4332 | rtx t1, t2, t3, t4; | |
4333 | ||
02a65aef R |
4334 | if (post_shift >= BITS_PER_WORD |
4335 | || size - 1 >= BITS_PER_WORD) | |
4336 | goto fail1; | |
4337 | ||
55c2d311 | 4338 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); |
e71c0aa7 | 4339 | mlr = gen_int_mode (ml, compute_mode); |
5322d07e NF |
4340 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4341 | + shift_cost (speed, compute_mode, size - 1) | |
4342 | + 2 * add_cost (speed, compute_mode)); | |
00f07b86 | 4343 | t1 = expmed_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4344 | NULL_RTX, 0, |
4345 | max_cost - extra_cost); | |
55c2d311 TG |
4346 | if (t1 == 0) |
4347 | goto fail1; | |
c5c76735 JL |
4348 | t2 = force_operand (gen_rtx_PLUS (compute_mode, |
4349 | t1, op0), | |
55c2d311 | 4350 | NULL_RTX); |
4a90aeeb NS |
4351 | t3 = expand_shift |
4352 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4353 | post_shift, NULL_RTX, 0); |
4a90aeeb NS |
4354 | t4 = expand_shift |
4355 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4356 | size - 1, NULL_RTX, 0); |
55c2d311 | 4357 | if (d < 0) |
c5c76735 JL |
4358 | quotient |
4359 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4360 | t4, t3), | |
4361 | tquotient); | |
55c2d311 | 4362 | else |
c5c76735 JL |
4363 | quotient |
4364 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4365 | t3, t4), | |
4366 | tquotient); | |
55c2d311 TG |
4367 | } |
4368 | } | |
34f016ed TG |
4369 | else /* Too wide mode to use tricky code */ |
4370 | break; | |
55c2d311 | 4371 | |
4e430df8 | 4372 | insn = get_last_insn (); |
7543f918 JR |
4373 | if (insn != last) |
4374 | set_dst_reg_note (insn, REG_EQUAL, | |
4375 | gen_rtx_DIV (compute_mode, op0, op1), | |
4376 | quotient); | |
55c2d311 TG |
4377 | } |
4378 | break; | |
4379 | } | |
4380 | fail1: | |
4381 | delete_insns_since (last); | |
4382 | break; | |
44037a66 | 4383 | |
55c2d311 TG |
4384 | case FLOOR_DIV_EXPR: |
4385 | case FLOOR_MOD_EXPR: | |
4386 | /* We will come here only for signed operations. */ | |
4387 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4388 | { | |
079c527f | 4389 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4390 | int pre_shift, lgup, post_shift; |
4391 | HOST_WIDE_INT d = INTVAL (op1); | |
4392 | ||
4393 | if (d > 0) | |
4394 | { | |
4395 | /* We could just as easily deal with negative constants here, | |
4396 | but it does not seem worth the trouble for GCC 2.6. */ | |
4397 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4398 | { | |
4399 | pre_shift = floor_log2 (d); | |
4400 | if (rem_flag) | |
4401 | { | |
2f1cd2eb RS |
4402 | unsigned HOST_WIDE_INT mask |
4403 | = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1; | |
4404 | remainder = expand_binop | |
4405 | (compute_mode, and_optab, op0, | |
4406 | gen_int_mode (mask, compute_mode), | |
4407 | remainder, 0, OPTAB_LIB_WIDEN); | |
55c2d311 | 4408 | if (remainder) |
c8dbc8ca | 4409 | return gen_lowpart (mode, remainder); |
55c2d311 | 4410 | } |
4a90aeeb NS |
4411 | quotient = expand_shift |
4412 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4413 | pre_shift, tquotient, 0); |
55c2d311 TG |
4414 | } |
4415 | else | |
4416 | { | |
4417 | rtx t1, t2, t3, t4; | |
4418 | ||
4419 | mh = choose_multiplier (d, size, size - 1, | |
4420 | &ml, &post_shift, &lgup); | |
5b0264cb | 4421 | gcc_assert (!mh); |
55c2d311 | 4422 | |
02a65aef R |
4423 | if (post_shift < BITS_PER_WORD |
4424 | && size - 1 < BITS_PER_WORD) | |
55c2d311 | 4425 | { |
4a90aeeb NS |
4426 | t1 = expand_shift |
4427 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4428 | size - 1, NULL_RTX, 0); |
02a65aef R |
4429 | t2 = expand_binop (compute_mode, xor_optab, op0, t1, |
4430 | NULL_RTX, 0, OPTAB_WIDEN); | |
5322d07e NF |
4431 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4432 | + shift_cost (speed, compute_mode, size - 1) | |
4433 | + 2 * add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4434 | t3 = expmed_mult_highpart |
4435 | (compute_mode, t2, gen_int_mode (ml, compute_mode), | |
4436 | NULL_RTX, 1, max_cost - extra_cost); | |
02a65aef R |
4437 | if (t3 != 0) |
4438 | { | |
4a90aeeb NS |
4439 | t4 = expand_shift |
4440 | (RSHIFT_EXPR, compute_mode, t3, | |
eb6c3df1 | 4441 | post_shift, NULL_RTX, 1); |
02a65aef R |
4442 | quotient = expand_binop (compute_mode, xor_optab, |
4443 | t4, t1, tquotient, 0, | |
4444 | OPTAB_WIDEN); | |
4445 | } | |
55c2d311 TG |
4446 | } |
4447 | } | |
4448 | } | |
4449 | else | |
4450 | { | |
4451 | rtx nsign, t1, t2, t3, t4; | |
38a448ca RH |
4452 | t1 = force_operand (gen_rtx_PLUS (compute_mode, |
4453 | op0, constm1_rtx), NULL_RTX); | |
55c2d311 TG |
4454 | t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, |
4455 | 0, OPTAB_WIDEN); | |
4a90aeeb NS |
4456 | nsign = expand_shift |
4457 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4458 | size - 1, NULL_RTX, 0); |
38a448ca | 4459 | t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), |
55c2d311 TG |
4460 | NULL_RTX); |
4461 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, | |
4462 | NULL_RTX, 0); | |
4463 | if (t4) | |
4464 | { | |
4465 | rtx t5; | |
4466 | t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, | |
4467 | NULL_RTX, 0); | |
38a448ca RH |
4468 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4469 | t4, t5), | |
55c2d311 TG |
4470 | tquotient); |
4471 | } | |
4472 | } | |
4473 | } | |
4474 | ||
4475 | if (quotient != 0) | |
4476 | break; | |
4477 | delete_insns_since (last); | |
4478 | ||
4479 | /* Try using an instruction that produces both the quotient and | |
4480 | remainder, using truncation. We can easily compensate the quotient | |
4481 | or remainder to get floor rounding, once we have the remainder. | |
4482 | Notice that we compute also the final remainder value here, | |
4483 | and return the result right away. */ | |
a45cf58c | 4484 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4485 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4486 | |
55c2d311 TG |
4487 | if (rem_flag) |
4488 | { | |
668443c9 | 4489 | remainder |
f8cfc6aa | 4490 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4491 | quotient = gen_reg_rtx (compute_mode); |
4492 | } | |
4493 | else | |
4494 | { | |
668443c9 | 4495 | quotient |
f8cfc6aa | 4496 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4497 | remainder = gen_reg_rtx (compute_mode); |
4498 | } | |
4499 | ||
4500 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4501 | quotient, remainder, 0)) | |
4502 | { | |
4503 | /* This could be computed with a branch-less sequence. | |
4504 | Save that for later. */ | |
4505 | rtx tem; | |
f3f6fb16 | 4506 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 | 4507 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
55c2d311 TG |
4508 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4509 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4510 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
55c2d311 TG |
4511 | expand_dec (quotient, const1_rtx); |
4512 | expand_inc (remainder, op1); | |
4513 | emit_label (label); | |
c8dbc8ca | 4514 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4515 | } |
4516 | ||
4517 | /* No luck with division elimination or divmod. Have to do it | |
4518 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4519 | { |
f3f6fb16 | 4520 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4521 | rtx adjusted_op0; |
4522 | rtx tem; | |
4523 | ||
4524 | quotient = gen_reg_rtx (compute_mode); | |
4525 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4526 | label1 = gen_label_rtx (); | |
4527 | label2 = gen_label_rtx (); | |
4528 | label3 = gen_label_rtx (); | |
4529 | label4 = gen_label_rtx (); | |
4530 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4531 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4532 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
55c2d311 TG |
4533 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4534 | quotient, 0, OPTAB_LIB_WIDEN); | |
4535 | if (tem != quotient) | |
4536 | emit_move_insn (quotient, tem); | |
ec4a505f | 4537 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4538 | emit_barrier (); |
4539 | emit_label (label1); | |
44037a66 | 4540 | expand_inc (adjusted_op0, const1_rtx); |
ec4a505f | 4541 | emit_jump_insn (targetm.gen_jump (label4)); |
55c2d311 TG |
4542 | emit_barrier (); |
4543 | emit_label (label2); | |
f5963e61 | 4544 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
55c2d311 TG |
4545 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4546 | quotient, 0, OPTAB_LIB_WIDEN); | |
4547 | if (tem != quotient) | |
4548 | emit_move_insn (quotient, tem); | |
ec4a505f | 4549 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4550 | emit_barrier (); |
4551 | emit_label (label3); | |
4552 | expand_dec (adjusted_op0, const1_rtx); | |
4553 | emit_label (label4); | |
4554 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4555 | quotient, 0, OPTAB_LIB_WIDEN); | |
4556 | if (tem != quotient) | |
4557 | emit_move_insn (quotient, tem); | |
4558 | expand_dec (quotient, const1_rtx); | |
4559 | emit_label (label5); | |
44037a66 | 4560 | } |
55c2d311 | 4561 | break; |
44037a66 | 4562 | |
55c2d311 TG |
4563 | case CEIL_DIV_EXPR: |
4564 | case CEIL_MOD_EXPR: | |
4565 | if (unsignedp) | |
4566 | { | |
9176af2f TG |
4567 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) |
4568 | { | |
4569 | rtx t1, t2, t3; | |
4570 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4571 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4572 | floor_log2 (d), tquotient, 1); |
9176af2f | 4573 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 4574 | gen_int_mode (d - 1, compute_mode), |
9176af2f TG |
4575 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4576 | t3 = gen_reg_rtx (compute_mode); | |
4577 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4578 | compute_mode, 1, 1); | |
412381d9 TG |
4579 | if (t3 == 0) |
4580 | { | |
f3f6fb16 | 4581 | rtx_code_label *lab; |
412381d9 | 4582 | lab = gen_label_rtx (); |
f5963e61 | 4583 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
412381d9 TG |
4584 | expand_inc (t1, const1_rtx); |
4585 | emit_label (lab); | |
4586 | quotient = t1; | |
4587 | } | |
4588 | else | |
38a448ca RH |
4589 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4590 | t1, t3), | |
412381d9 | 4591 | tquotient); |
9176af2f TG |
4592 | break; |
4593 | } | |
55c2d311 TG |
4594 | |
4595 | /* Try using an instruction that produces both the quotient and | |
4596 | remainder, using truncation. We can easily compensate the | |
4597 | quotient or remainder to get ceiling rounding, once we have the | |
4598 | remainder. Notice that we compute also the final remainder | |
4599 | value here, and return the result right away. */ | |
a45cf58c | 4600 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4601 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4602 | |
55c2d311 TG |
4603 | if (rem_flag) |
4604 | { | |
f8cfc6aa | 4605 | remainder = (REG_P (target) |
668443c9 | 4606 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4607 | quotient = gen_reg_rtx (compute_mode); |
4608 | } | |
4609 | else | |
4610 | { | |
f8cfc6aa | 4611 | quotient = (REG_P (target) |
668443c9 | 4612 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4613 | remainder = gen_reg_rtx (compute_mode); |
4614 | } | |
4615 | ||
4616 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4617 | remainder, 1)) | |
4618 | { | |
4619 | /* This could be computed with a branch-less sequence. | |
4620 | Save that for later. */ | |
f3f6fb16 | 4621 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4622 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4623 | compute_mode, label); | |
55c2d311 TG |
4624 | expand_inc (quotient, const1_rtx); |
4625 | expand_dec (remainder, op1); | |
4626 | emit_label (label); | |
c8dbc8ca | 4627 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4628 | } |
4629 | ||
4630 | /* No luck with division elimination or divmod. Have to do it | |
4631 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4632 | { |
f3f6fb16 | 4633 | rtx_code_label *label1, *label2; |
55c2d311 TG |
4634 | rtx adjusted_op0, tem; |
4635 | ||
4636 | quotient = gen_reg_rtx (compute_mode); | |
4637 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4638 | label1 = gen_label_rtx (); | |
4639 | label2 = gen_label_rtx (); | |
f5963e61 JL |
4640 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4641 | compute_mode, label1); | |
55c2d311 | 4642 | emit_move_insn (quotient, const0_rtx); |
ec4a505f | 4643 | emit_jump_insn (targetm.gen_jump (label2)); |
55c2d311 TG |
4644 | emit_barrier (); |
4645 | emit_label (label1); | |
4646 | expand_dec (adjusted_op0, const1_rtx); | |
4647 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4648 | quotient, 1, OPTAB_LIB_WIDEN); | |
4649 | if (tem != quotient) | |
4650 | emit_move_insn (quotient, tem); | |
4651 | expand_inc (quotient, const1_rtx); | |
4652 | emit_label (label2); | |
44037a66 | 4653 | } |
55c2d311 TG |
4654 | } |
4655 | else /* signed */ | |
4656 | { | |
73f27728 RK |
4657 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4658 | && INTVAL (op1) >= 0) | |
4659 | { | |
4660 | /* This is extremely similar to the code for the unsigned case | |
4661 | above. For 2.7 we should merge these variants, but for | |
4662 | 2.6.1 I don't want to touch the code for unsigned since that | |
4663 | get used in C. The signed case will only be used by other | |
4664 | languages (Ada). */ | |
4665 | ||
4666 | rtx t1, t2, t3; | |
4667 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4668 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4669 | floor_log2 (d), tquotient, 0); |
73f27728 | 4670 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 4671 | gen_int_mode (d - 1, compute_mode), |
73f27728 RK |
4672 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4673 | t3 = gen_reg_rtx (compute_mode); | |
4674 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4675 | compute_mode, 1, 1); | |
4676 | if (t3 == 0) | |
4677 | { | |
f3f6fb16 | 4678 | rtx_code_label *lab; |
73f27728 | 4679 | lab = gen_label_rtx (); |
f5963e61 | 4680 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
73f27728 RK |
4681 | expand_inc (t1, const1_rtx); |
4682 | emit_label (lab); | |
4683 | quotient = t1; | |
4684 | } | |
4685 | else | |
38a448ca RH |
4686 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4687 | t1, t3), | |
73f27728 RK |
4688 | tquotient); |
4689 | break; | |
4690 | } | |
4691 | ||
55c2d311 TG |
4692 | /* Try using an instruction that produces both the quotient and |
4693 | remainder, using truncation. We can easily compensate the | |
4694 | quotient or remainder to get ceiling rounding, once we have the | |
4695 | remainder. Notice that we compute also the final remainder | |
4696 | value here, and return the result right away. */ | |
a45cf58c | 4697 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 TG |
4698 | target = gen_reg_rtx (compute_mode); |
4699 | if (rem_flag) | |
4700 | { | |
f8cfc6aa | 4701 | remainder= (REG_P (target) |
668443c9 | 4702 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4703 | quotient = gen_reg_rtx (compute_mode); |
4704 | } | |
4705 | else | |
4706 | { | |
f8cfc6aa | 4707 | quotient = (REG_P (target) |
668443c9 | 4708 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4709 | remainder = gen_reg_rtx (compute_mode); |
4710 | } | |
4711 | ||
4712 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4713 | remainder, 0)) | |
4714 | { | |
4715 | /* This could be computed with a branch-less sequence. | |
4716 | Save that for later. */ | |
4717 | rtx tem; | |
f3f6fb16 | 4718 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4719 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4720 | compute_mode, label); | |
55c2d311 TG |
4721 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4722 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4723 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
55c2d311 TG |
4724 | expand_inc (quotient, const1_rtx); |
4725 | expand_dec (remainder, op1); | |
4726 | emit_label (label); | |
c8dbc8ca | 4727 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4728 | } |
4729 | ||
4730 | /* No luck with division elimination or divmod. Have to do it | |
4731 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4732 | { |
f3f6fb16 | 4733 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4734 | rtx adjusted_op0; |
4735 | rtx tem; | |
4736 | ||
4737 | quotient = gen_reg_rtx (compute_mode); | |
4738 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4739 | label1 = gen_label_rtx (); | |
4740 | label2 = gen_label_rtx (); | |
4741 | label3 = gen_label_rtx (); | |
4742 | label4 = gen_label_rtx (); | |
4743 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4744 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4745 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4746 | compute_mode, label1); | |
55c2d311 TG |
4747 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4748 | quotient, 0, OPTAB_LIB_WIDEN); | |
4749 | if (tem != quotient) | |
4750 | emit_move_insn (quotient, tem); | |
ec4a505f | 4751 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4752 | emit_barrier (); |
4753 | emit_label (label1); | |
4754 | expand_dec (adjusted_op0, const1_rtx); | |
ec4a505f | 4755 | emit_jump_insn (targetm.gen_jump (label4)); |
55c2d311 TG |
4756 | emit_barrier (); |
4757 | emit_label (label2); | |
f5963e61 JL |
4758 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4759 | compute_mode, label3); | |
55c2d311 TG |
4760 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4761 | quotient, 0, OPTAB_LIB_WIDEN); | |
4762 | if (tem != quotient) | |
4763 | emit_move_insn (quotient, tem); | |
ec4a505f | 4764 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4765 | emit_barrier (); |
4766 | emit_label (label3); | |
4767 | expand_inc (adjusted_op0, const1_rtx); | |
4768 | emit_label (label4); | |
4769 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4770 | quotient, 0, OPTAB_LIB_WIDEN); | |
4771 | if (tem != quotient) | |
4772 | emit_move_insn (quotient, tem); | |
4773 | expand_inc (quotient, const1_rtx); | |
4774 | emit_label (label5); | |
44037a66 | 4775 | } |
55c2d311 TG |
4776 | } |
4777 | break; | |
bc1c7e93 | 4778 | |
55c2d311 TG |
4779 | case EXACT_DIV_EXPR: |
4780 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4781 | { | |
4782 | HOST_WIDE_INT d = INTVAL (op1); | |
4783 | unsigned HOST_WIDE_INT ml; | |
91ce572a | 4784 | int pre_shift; |
55c2d311 TG |
4785 | rtx t1; |
4786 | ||
91ce572a CC |
4787 | pre_shift = floor_log2 (d & -d); |
4788 | ml = invert_mod2n (d >> pre_shift, size); | |
4789 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4790 | pre_shift, NULL_RTX, unsignedp); |
69107307 | 4791 | quotient = expand_mult (compute_mode, t1, |
2496c7bd | 4792 | gen_int_mode (ml, compute_mode), |
31ff3e0b | 4793 | NULL_RTX, 1); |
55c2d311 TG |
4794 | |
4795 | insn = get_last_insn (); | |
7543f918 JR |
4796 | set_dst_reg_note (insn, REG_EQUAL, |
4797 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, | |
4798 | compute_mode, op0, op1), | |
4799 | quotient); | |
55c2d311 TG |
4800 | } |
4801 | break; | |
4802 | ||
4803 | case ROUND_DIV_EXPR: | |
4804 | case ROUND_MOD_EXPR: | |
69f61901 RK |
4805 | if (unsignedp) |
4806 | { | |
4807 | rtx tem; | |
f3f6fb16 | 4808 | rtx_code_label *label; |
69f61901 RK |
4809 | label = gen_label_rtx (); |
4810 | quotient = gen_reg_rtx (compute_mode); | |
4811 | remainder = gen_reg_rtx (compute_mode); | |
4812 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) | |
4813 | { | |
4814 | rtx tem; | |
4815 | quotient = expand_binop (compute_mode, udiv_optab, op0, op1, | |
4816 | quotient, 1, OPTAB_LIB_WIDEN); | |
4817 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); | |
4818 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4819 | remainder, 1, OPTAB_LIB_WIDEN); | |
4820 | } | |
0a81f074 | 4821 | tem = plus_constant (compute_mode, op1, -1); |
eb6c3df1 | 4822 | tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); |
f5963e61 | 4823 | do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); |
69f61901 RK |
4824 | expand_inc (quotient, const1_rtx); |
4825 | expand_dec (remainder, op1); | |
4826 | emit_label (label); | |
4827 | } | |
4828 | else | |
4829 | { | |
4830 | rtx abs_rem, abs_op1, tem, mask; | |
f3f6fb16 | 4831 | rtx_code_label *label; |
69f61901 RK |
4832 | label = gen_label_rtx (); |
4833 | quotient = gen_reg_rtx (compute_mode); | |
4834 | remainder = gen_reg_rtx (compute_mode); | |
4835 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) | |
4836 | { | |
4837 | rtx tem; | |
4838 | quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, | |
4839 | quotient, 0, OPTAB_LIB_WIDEN); | |
4840 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); | |
4841 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4842 | remainder, 0, OPTAB_LIB_WIDEN); | |
4843 | } | |
91ce572a CC |
4844 | abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); |
4845 | abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); | |
69f61901 | 4846 | tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, |
eb6c3df1 | 4847 | 1, NULL_RTX, 1); |
f5963e61 | 4848 | do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); |
69f61901 RK |
4849 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4850 | NULL_RTX, 0, OPTAB_WIDEN); | |
4851 | mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
eb6c3df1 | 4852 | size - 1, NULL_RTX, 0); |
69f61901 RK |
4853 | tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, |
4854 | NULL_RTX, 0, OPTAB_WIDEN); | |
4855 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4856 | NULL_RTX, 0, OPTAB_WIDEN); | |
4857 | expand_inc (quotient, tem); | |
4858 | tem = expand_binop (compute_mode, xor_optab, mask, op1, | |
4859 | NULL_RTX, 0, OPTAB_WIDEN); | |
4860 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4861 | NULL_RTX, 0, OPTAB_WIDEN); | |
4862 | expand_dec (remainder, tem); | |
4863 | emit_label (label); | |
4864 | } | |
4865 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
c410d49e | 4866 | |
e9a25f70 | 4867 | default: |
5b0264cb | 4868 | gcc_unreachable (); |
55c2d311 | 4869 | } |
44037a66 | 4870 | |
55c2d311 | 4871 | if (quotient == 0) |
44037a66 | 4872 | { |
a45cf58c RK |
4873 | if (target && GET_MODE (target) != compute_mode) |
4874 | target = 0; | |
4875 | ||
55c2d311 | 4876 | if (rem_flag) |
44037a66 | 4877 | { |
32fdf36b | 4878 | /* Try to produce the remainder without producing the quotient. |
d6a7951f | 4879 | If we seem to have a divmod pattern that does not require widening, |
b20b352b | 4880 | don't try widening here. We should really have a WIDEN argument |
32fdf36b TG |
4881 | to expand_twoval_binop, since what we'd really like to do here is |
4882 | 1) try a mod insn in compute_mode | |
4883 | 2) try a divmod insn in compute_mode | |
4884 | 3) try a div insn in compute_mode and multiply-subtract to get | |
4885 | remainder | |
4886 | 4) try the same things with widening allowed. */ | |
4887 | remainder | |
4888 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4889 | op0, op1, target, | |
4890 | unsignedp, | |
947131ba | 4891 | ((optab_handler (optab2, compute_mode) |
32fdf36b TG |
4892 | != CODE_FOR_nothing) |
4893 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
55c2d311 | 4894 | if (remainder == 0) |
44037a66 TG |
4895 | { |
4896 | /* No luck there. Can we do remainder and divide at once | |
4897 | without a library call? */ | |
55c2d311 TG |
4898 | remainder = gen_reg_rtx (compute_mode); |
4899 | if (! expand_twoval_binop ((unsignedp | |
4900 | ? udivmod_optab | |
4901 | : sdivmod_optab), | |
4902 | op0, op1, | |
4903 | NULL_RTX, remainder, unsignedp)) | |
4904 | remainder = 0; | |
44037a66 | 4905 | } |
55c2d311 TG |
4906 | |
4907 | if (remainder) | |
4908 | return gen_lowpart (mode, remainder); | |
44037a66 | 4909 | } |
44037a66 | 4910 | |
dc38b292 RK |
4911 | /* Produce the quotient. Try a quotient insn, but not a library call. |
4912 | If we have a divmod in this mode, use it in preference to widening | |
4913 | the div (for this test we assume it will not fail). Note that optab2 | |
4914 | is set to the one of the two optabs that the call below will use. */ | |
4915 | quotient | |
4916 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
4917 | op0, op1, rem_flag ? NULL_RTX : target, | |
4918 | unsignedp, | |
947131ba | 4919 | ((optab_handler (optab2, compute_mode) |
dc38b292 RK |
4920 | != CODE_FOR_nothing) |
4921 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
4922 | ||
55c2d311 | 4923 | if (quotient == 0) |
44037a66 TG |
4924 | { |
4925 | /* No luck there. Try a quotient-and-remainder insn, | |
4926 | keeping the quotient alone. */ | |
55c2d311 | 4927 | quotient = gen_reg_rtx (compute_mode); |
44037a66 | 4928 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
55c2d311 TG |
4929 | op0, op1, |
4930 | quotient, NULL_RTX, unsignedp)) | |
4931 | { | |
4932 | quotient = 0; | |
4933 | if (! rem_flag) | |
4934 | /* Still no luck. If we are not computing the remainder, | |
4935 | use a library call for the quotient. */ | |
4936 | quotient = sign_expand_binop (compute_mode, | |
4937 | udiv_optab, sdiv_optab, | |
4938 | op0, op1, target, | |
4939 | unsignedp, OPTAB_LIB_WIDEN); | |
4940 | } | |
44037a66 | 4941 | } |
44037a66 TG |
4942 | } |
4943 | ||
44037a66 TG |
4944 | if (rem_flag) |
4945 | { | |
a45cf58c RK |
4946 | if (target && GET_MODE (target) != compute_mode) |
4947 | target = 0; | |
4948 | ||
55c2d311 | 4949 | if (quotient == 0) |
b3f8d95d MM |
4950 | { |
4951 | /* No divide instruction either. Use library for remainder. */ | |
4952 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4953 | op0, op1, target, | |
4954 | unsignedp, OPTAB_LIB_WIDEN); | |
4955 | /* No remainder function. Try a quotient-and-remainder | |
4956 | function, keeping the remainder. */ | |
4957 | if (!remainder) | |
4958 | { | |
4959 | remainder = gen_reg_rtx (compute_mode); | |
b8698a0f | 4960 | if (!expand_twoval_binop_libfunc |
b3f8d95d MM |
4961 | (unsignedp ? udivmod_optab : sdivmod_optab, |
4962 | op0, op1, | |
4963 | NULL_RTX, remainder, | |
4964 | unsignedp ? UMOD : MOD)) | |
4965 | remainder = NULL_RTX; | |
4966 | } | |
4967 | } | |
44037a66 TG |
4968 | else |
4969 | { | |
4970 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
55c2d311 TG |
4971 | remainder = expand_mult (compute_mode, quotient, op1, |
4972 | NULL_RTX, unsignedp); | |
4973 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
4974 | remainder, target, unsignedp, | |
4975 | OPTAB_LIB_WIDEN); | |
44037a66 TG |
4976 | } |
4977 | } | |
4978 | ||
55c2d311 | 4979 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
44037a66 TG |
4980 | } |
4981 | \f | |
4982 | /* Return a tree node with data type TYPE, describing the value of X. | |
4dfa0342 | 4983 | Usually this is an VAR_DECL, if there is no obvious better choice. |
44037a66 | 4984 | X may be an expression, however we only support those expressions |
6d2f8887 | 4985 | generated by loop.c. */ |
44037a66 TG |
4986 | |
4987 | tree | |
502b8322 | 4988 | make_tree (tree type, rtx x) |
44037a66 TG |
4989 | { |
4990 | tree t; | |
4991 | ||
4992 | switch (GET_CODE (x)) | |
4993 | { | |
4994 | case CONST_INT: | |
807e902e KZ |
4995 | case CONST_WIDE_INT: |
4996 | t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type))); | |
4997 | return t; | |
b8698a0f | 4998 | |
44037a66 | 4999 | case CONST_DOUBLE: |
807e902e KZ |
5000 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
5001 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) | |
5002 | t = wide_int_to_tree (type, | |
5003 | wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, | |
5004 | HOST_BITS_PER_WIDE_INT * 2)); | |
44037a66 TG |
5005 | else |
5006 | { | |
5007 | REAL_VALUE_TYPE d; | |
5008 | ||
5009 | REAL_VALUE_FROM_CONST_DOUBLE (d, x); | |
5010 | t = build_real (type, d); | |
5011 | } | |
5012 | ||
5013 | return t; | |
69ef87e2 AH |
5014 | |
5015 | case CONST_VECTOR: | |
5016 | { | |
b8b7f162 RS |
5017 | int units = CONST_VECTOR_NUNITS (x); |
5018 | tree itype = TREE_TYPE (type); | |
d2a12ae7 | 5019 | tree *elts; |
b8b7f162 | 5020 | int i; |
69ef87e2 | 5021 | |
69ef87e2 | 5022 | /* Build a tree with vector elements. */ |
d2a12ae7 | 5023 | elts = XALLOCAVEC (tree, units); |
69ef87e2 AH |
5024 | for (i = units - 1; i >= 0; --i) |
5025 | { | |
b8b7f162 | 5026 | rtx elt = CONST_VECTOR_ELT (x, i); |
d2a12ae7 | 5027 | elts[i] = make_tree (itype, elt); |
69ef87e2 | 5028 | } |
c410d49e | 5029 | |
d2a12ae7 | 5030 | return build_vector (type, elts); |
69ef87e2 AH |
5031 | } |
5032 | ||
44037a66 | 5033 | case PLUS: |
4845b383 KH |
5034 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5035 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5036 | |
44037a66 | 5037 | case MINUS: |
4845b383 KH |
5038 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5039 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5040 | |
44037a66 | 5041 | case NEG: |
4845b383 | 5042 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
44037a66 TG |
5043 | |
5044 | case MULT: | |
4845b383 KH |
5045 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5046 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5047 | |
44037a66 | 5048 | case ASHIFT: |
4845b383 KH |
5049 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5050 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5051 | |
44037a66 | 5052 | case LSHIFTRT: |
ca5ba2a3 | 5053 | t = unsigned_type_for (type); |
aeba6c28 JM |
5054 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5055 | make_tree (t, XEXP (x, 0)), | |
5056 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5057 | |
44037a66 | 5058 | case ASHIFTRT: |
12753674 | 5059 | t = signed_type_for (type); |
aeba6c28 JM |
5060 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5061 | make_tree (t, XEXP (x, 0)), | |
5062 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5063 | |
44037a66 TG |
5064 | case DIV: |
5065 | if (TREE_CODE (type) != REAL_TYPE) | |
12753674 | 5066 | t = signed_type_for (type); |
44037a66 TG |
5067 | else |
5068 | t = type; | |
5069 | ||
aeba6c28 JM |
5070 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5071 | make_tree (t, XEXP (x, 0)), | |
5072 | make_tree (t, XEXP (x, 1)))); | |
44037a66 | 5073 | case UDIV: |
ca5ba2a3 | 5074 | t = unsigned_type_for (type); |
aeba6c28 JM |
5075 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5076 | make_tree (t, XEXP (x, 0)), | |
5077 | make_tree (t, XEXP (x, 1)))); | |
5c45425b RH |
5078 | |
5079 | case SIGN_EXTEND: | |
5080 | case ZERO_EXTEND: | |
ae2bcd98 RS |
5081 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5082 | GET_CODE (x) == ZERO_EXTEND); | |
aeba6c28 | 5083 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5c45425b | 5084 | |
84816907 JM |
5085 | case CONST: |
5086 | return make_tree (type, XEXP (x, 0)); | |
5087 | ||
5088 | case SYMBOL_REF: | |
5089 | t = SYMBOL_REF_DECL (x); | |
5090 | if (t) | |
5091 | return fold_convert (type, build_fold_addr_expr (t)); | |
5092 | /* else fall through. */ | |
5093 | ||
4dfa0342 | 5094 | default: |
c2255bc4 | 5095 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
d1608933 | 5096 | |
d4ebfa65 BE |
5097 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5098 | address mode to pointer mode. */ | |
5ae6cd0d | 5099 | if (POINTER_TYPE_P (type)) |
d4ebfa65 BE |
5100 | x = convert_memory_address_addr_space |
5101 | (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); | |
d1608933 | 5102 | |
8a0aa06e RH |
5103 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5104 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
820cc88f | 5105 | t->decl_with_rtl.rtl = x; |
4dfa0342 | 5106 | |
44037a66 TG |
5107 | return t; |
5108 | } | |
5109 | } | |
44037a66 TG |
5110 | \f |
5111 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5112 | and returning TARGET. | |
5113 | ||
5114 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5115 | ||
5116 | rtx | |
ef4bddc2 | 5117 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
44037a66 | 5118 | { |
22273300 | 5119 | rtx tem = 0; |
44037a66 | 5120 | |
22273300 JJ |
5121 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5122 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5123 | if (tem == 0) | |
44037a66 | 5124 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
44037a66 TG |
5125 | |
5126 | if (target == 0) | |
5127 | target = tem; | |
5128 | else if (tem != target) | |
5129 | emit_move_insn (target, tem); | |
5130 | return target; | |
5131 | } | |
495499da | 5132 | |
a41a56b6 | 5133 | /* Helper function for emit_store_flag. */ |
2d52a3a1 | 5134 | rtx |
ef12ae45 | 5135 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
ef4bddc2 | 5136 | machine_mode mode, machine_mode compare_mode, |
92355a9c | 5137 | int unsignedp, rtx x, rtx y, int normalizep, |
ef4bddc2 | 5138 | machine_mode target_mode) |
a41a56b6 | 5139 | { |
a5c7d693 | 5140 | struct expand_operand ops[4]; |
f3f6fb16 DM |
5141 | rtx op0, comparison, subtarget; |
5142 | rtx_insn *last; | |
ef4bddc2 | 5143 | machine_mode result_mode = targetm.cstore_mode (icode); |
45475a3f PB |
5144 | |
5145 | last = get_last_insn (); | |
5146 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5147 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
a5c7d693 | 5148 | if (!x || !y) |
45475a3f PB |
5149 | { |
5150 | delete_insns_since (last); | |
5151 | return NULL_RTX; | |
5152 | } | |
5153 | ||
92355a9c PB |
5154 | if (target_mode == VOIDmode) |
5155 | target_mode = result_mode; | |
5156 | if (!target) | |
5157 | target = gen_reg_rtx (target_mode); | |
b8698a0f | 5158 | |
a5c7d693 | 5159 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
45475a3f | 5160 | |
a5c7d693 RS |
5161 | create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); |
5162 | create_fixed_operand (&ops[1], comparison); | |
5163 | create_fixed_operand (&ops[2], x); | |
5164 | create_fixed_operand (&ops[3], y); | |
5165 | if (!maybe_expand_insn (icode, 4, ops)) | |
5166 | { | |
5167 | delete_insns_since (last); | |
5168 | return NULL_RTX; | |
5169 | } | |
5170 | subtarget = ops[0].value; | |
495499da | 5171 | |
a41a56b6 RE |
5172 | /* If we are converting to a wider mode, first convert to |
5173 | TARGET_MODE, then normalize. This produces better combining | |
5174 | opportunities on machines that have a SIGN_EXTRACT when we are | |
5175 | testing a single bit. This mostly benefits the 68k. | |
5176 | ||
5177 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5178 | interpreted in MODE, we can do this conversion as unsigned, which | |
5179 | is usually more efficient. */ | |
45475a3f | 5180 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) |
a41a56b6 RE |
5181 | { |
5182 | convert_move (target, subtarget, | |
2d0c270f BS |
5183 | val_signbit_known_clear_p (result_mode, |
5184 | STORE_FLAG_VALUE)); | |
a41a56b6 | 5185 | op0 = target; |
45475a3f | 5186 | result_mode = target_mode; |
a41a56b6 RE |
5187 | } |
5188 | else | |
5189 | op0 = subtarget; | |
5190 | ||
5191 | /* If we want to keep subexpressions around, don't reuse our last | |
5192 | target. */ | |
5193 | if (optimize) | |
5194 | subtarget = 0; | |
5195 | ||
5196 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5197 | have to do anything. */ | |
5198 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5199 | ; | |
5200 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5201 | the comparison this way to avoid a compiler-time warning. */ | |
5202 | else if (- normalizep == STORE_FLAG_VALUE) | |
45475a3f | 5203 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
a41a56b6 RE |
5204 | |
5205 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5206 | it hard to use a value of just the sign bit due to ANSI integer | |
5207 | constant typing rules. */ | |
2d0c270f | 5208 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
45475a3f | 5209 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, |
eb6c3df1 | 5210 | GET_MODE_BITSIZE (result_mode) - 1, subtarget, |
a41a56b6 RE |
5211 | normalizep == 1); |
5212 | else | |
5213 | { | |
5214 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5215 | ||
45475a3f | 5216 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
a41a56b6 | 5217 | if (normalizep == -1) |
45475a3f | 5218 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
a41a56b6 RE |
5219 | } |
5220 | ||
5221 | /* If we were converting to a smaller mode, do the conversion now. */ | |
45475a3f | 5222 | if (target_mode != result_mode) |
a41a56b6 RE |
5223 | { |
5224 | convert_move (target, op0, 0); | |
5225 | return target; | |
5226 | } | |
5227 | else | |
5228 | return op0; | |
5229 | } | |
5230 | ||
44037a66 | 5231 | |
ef12ae45 PB |
5232 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5233 | need a recursive call. These are kept separate to avoid infinite | |
5234 | loops. */ | |
44037a66 | 5235 | |
ef12ae45 PB |
5236 | static rtx |
5237 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
ef4bddc2 RS |
5238 | machine_mode mode, int unsignedp, int normalizep, |
5239 | machine_mode target_mode) | |
44037a66 TG |
5240 | { |
5241 | rtx subtarget; | |
5242 | enum insn_code icode; | |
ef4bddc2 | 5243 | machine_mode compare_mode; |
f90b7a5a | 5244 | enum mode_class mclass; |
45475a3f | 5245 | enum rtx_code scode; |
44037a66 | 5246 | |
b30f05db BS |
5247 | if (unsignedp) |
5248 | code = unsigned_condition (code); | |
45475a3f | 5249 | scode = swap_condition (code); |
b30f05db | 5250 | |
c2615a67 RK |
5251 | /* If one operand is constant, make it the second one. Only do this |
5252 | if the other operand is not constant as well. */ | |
5253 | ||
8c9864f3 | 5254 | if (swap_commutative_operands_p (op0, op1)) |
c2615a67 | 5255 | { |
fab27f52 | 5256 | std::swap (op0, op1); |
c2615a67 RK |
5257 | code = swap_condition (code); |
5258 | } | |
5259 | ||
6405e07b DE |
5260 | if (mode == VOIDmode) |
5261 | mode = GET_MODE (op0); | |
5262 | ||
c410d49e | 5263 | /* For some comparisons with 1 and -1, we can convert this to |
44037a66 | 5264 | comparisons with zero. This will often produce more opportunities for |
0f41302f | 5265 | store-flag insns. */ |
44037a66 TG |
5266 | |
5267 | switch (code) | |
5268 | { | |
5269 | case LT: | |
5270 | if (op1 == const1_rtx) | |
5271 | op1 = const0_rtx, code = LE; | |
5272 | break; | |
5273 | case LE: | |
5274 | if (op1 == constm1_rtx) | |
5275 | op1 = const0_rtx, code = LT; | |
5276 | break; | |
5277 | case GE: | |
5278 | if (op1 == const1_rtx) | |
5279 | op1 = const0_rtx, code = GT; | |
5280 | break; | |
5281 | case GT: | |
5282 | if (op1 == constm1_rtx) | |
5283 | op1 = const0_rtx, code = GE; | |
5284 | break; | |
5285 | case GEU: | |
5286 | if (op1 == const1_rtx) | |
5287 | op1 = const0_rtx, code = NE; | |
5288 | break; | |
5289 | case LTU: | |
5290 | if (op1 == const1_rtx) | |
5291 | op1 = const0_rtx, code = EQ; | |
5292 | break; | |
e9a25f70 JL |
5293 | default: |
5294 | break; | |
44037a66 TG |
5295 | } |
5296 | ||
884815aa JB |
5297 | /* If we are comparing a double-word integer with zero or -1, we can |
5298 | convert the comparison into one involving a single word. */ | |
6912b84b RK |
5299 | if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 |
5300 | && GET_MODE_CLASS (mode) == MODE_INT | |
3c0cb5de | 5301 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
6912b84b | 5302 | { |
fab27f52 | 5303 | rtx tem; |
884815aa JB |
5304 | if ((code == EQ || code == NE) |
5305 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
6912b84b | 5306 | { |
1ed20a40 | 5307 | rtx op00, op01; |
8433f113 | 5308 | |
a41a56b6 RE |
5309 | /* Do a logical OR or AND of the two words and compare the |
5310 | result. */ | |
8433f113 RH |
5311 | op00 = simplify_gen_subreg (word_mode, op0, mode, 0); |
5312 | op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); | |
1ed20a40 PB |
5313 | tem = expand_binop (word_mode, |
5314 | op1 == const0_rtx ? ior_optab : and_optab, | |
5315 | op00, op01, NULL_RTX, unsignedp, | |
5316 | OPTAB_DIRECT); | |
884815aa | 5317 | |
1ed20a40 PB |
5318 | if (tem != 0) |
5319 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
92355a9c | 5320 | unsignedp, normalizep); |
6912b84b | 5321 | } |
884815aa | 5322 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
8433f113 RH |
5323 | { |
5324 | rtx op0h; | |
5325 | ||
5326 | /* If testing the sign bit, can just test on high word. */ | |
5327 | op0h = simplify_gen_subreg (word_mode, op0, mode, | |
a41a56b6 RE |
5328 | subreg_highpart_offset (word_mode, |
5329 | mode)); | |
1ed20a40 PB |
5330 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5331 | unsignedp, normalizep); | |
5332 | } | |
5333 | else | |
5334 | tem = NULL_RTX; | |
5335 | ||
5336 | if (tem) | |
5337 | { | |
92355a9c | 5338 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
1ed20a40 | 5339 | return tem; |
92355a9c PB |
5340 | if (!target) |
5341 | target = gen_reg_rtx (target_mode); | |
1ed20a40 PB |
5342 | |
5343 | convert_move (target, tem, | |
2d0c270f BS |
5344 | !val_signbit_known_set_p (word_mode, |
5345 | (normalizep ? normalizep | |
5346 | : STORE_FLAG_VALUE))); | |
1ed20a40 | 5347 | return target; |
8433f113 | 5348 | } |
6912b84b RK |
5349 | } |
5350 | ||
44037a66 TG |
5351 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5352 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5353 | if (op1 == const0_rtx && (code == LT || code == GE) | |
5354 | && GET_MODE_CLASS (mode) == MODE_INT | |
5355 | && (normalizep || STORE_FLAG_VALUE == 1 | |
2d0c270f | 5356 | || val_signbit_p (mode, STORE_FLAG_VALUE))) |
44037a66 | 5357 | { |
8deb7047 | 5358 | subtarget = target; |
44037a66 | 5359 | |
495499da PB |
5360 | if (!target) |
5361 | target_mode = mode; | |
5362 | ||
44037a66 TG |
5363 | /* If the result is to be wider than OP0, it is best to convert it |
5364 | first. If it is to be narrower, it is *incorrect* to convert it | |
5365 | first. */ | |
495499da | 5366 | else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) |
44037a66 | 5367 | { |
81722fa9 | 5368 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5369 | mode = target_mode; |
5370 | } | |
5371 | ||
5372 | if (target_mode != mode) | |
5373 | subtarget = 0; | |
5374 | ||
5375 | if (code == GE) | |
1d6eaf3d RK |
5376 | op0 = expand_unop (mode, one_cmpl_optab, op0, |
5377 | ((STORE_FLAG_VALUE == 1 || normalizep) | |
5378 | ? 0 : subtarget), 0); | |
44037a66 | 5379 | |
1d6eaf3d | 5380 | if (STORE_FLAG_VALUE == 1 || normalizep) |
44037a66 TG |
5381 | /* If we are supposed to produce a 0/1 value, we want to do |
5382 | a logical shift from the sign bit to the low-order bit; for | |
5383 | a -1/0 value, we do an arithmetic shift. */ | |
5384 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 5385 | GET_MODE_BITSIZE (mode) - 1, |
44037a66 TG |
5386 | subtarget, normalizep != -1); |
5387 | ||
5388 | if (mode != target_mode) | |
c2ec26b8 | 5389 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5390 | |
5391 | return op0; | |
5392 | } | |
5393 | ||
f90b7a5a PB |
5394 | mclass = GET_MODE_CLASS (mode); |
5395 | for (compare_mode = mode; compare_mode != VOIDmode; | |
5396 | compare_mode = GET_MODE_WIDER_MODE (compare_mode)) | |
a41a56b6 | 5397 | { |
ef4bddc2 | 5398 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
947131ba | 5399 | icode = optab_handler (cstore_optab, optab_mode); |
f90b7a5a | 5400 | if (icode != CODE_FOR_nothing) |
a41a56b6 | 5401 | { |
a41a56b6 | 5402 | do_pending_stack_adjust (); |
fab27f52 MM |
5403 | rtx tem = emit_cstore (target, icode, code, mode, compare_mode, |
5404 | unsignedp, op0, op1, normalizep, target_mode); | |
45475a3f PB |
5405 | if (tem) |
5406 | return tem; | |
44037a66 | 5407 | |
45475a3f | 5408 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
44037a66 | 5409 | { |
ef12ae45 | 5410 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
92355a9c | 5411 | unsignedp, op1, op0, normalizep, target_mode); |
45475a3f PB |
5412 | if (tem) |
5413 | return tem; | |
44037a66 | 5414 | } |
f90b7a5a | 5415 | break; |
44037a66 TG |
5416 | } |
5417 | } | |
5418 | ||
ef12ae45 PB |
5419 | return 0; |
5420 | } | |
5421 | ||
5422 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 | |
5423 | and storing in TARGET. Normally return TARGET. | |
5424 | Return 0 if that cannot be done. | |
5425 | ||
5426 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5427 | it is VOIDmode, they cannot both be CONST_INT. | |
5428 | ||
5429 | UNSIGNEDP is for the case where we have to widen the operands | |
5430 | to perform the operation. It says to use zero-extension. | |
5431 | ||
5432 | NORMALIZEP is 1 if we should convert the result to be either zero | |
5433 | or one. Normalize is -1 if we should convert the result to be | |
5434 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
5435 | "raw" out of the scc insn. */ | |
5436 | ||
5437 | rtx | |
5438 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
ef4bddc2 | 5439 | machine_mode mode, int unsignedp, int normalizep) |
ef12ae45 | 5440 | { |
ef4bddc2 | 5441 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
ef12ae45 PB |
5442 | enum rtx_code rcode; |
5443 | rtx subtarget; | |
f3f6fb16 DM |
5444 | rtx tem, trueval; |
5445 | rtx_insn *last; | |
ef12ae45 | 5446 | |
b2b262e3 JR |
5447 | /* If we compare constants, we shouldn't use a store-flag operation, |
5448 | but a constant load. We can get there via the vanilla route that | |
5449 | usually generates a compare-branch sequence, but will in this case | |
5450 | fold the comparison to a constant, and thus elide the branch. */ | |
5451 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) | |
5452 | return NULL_RTX; | |
5453 | ||
92355a9c PB |
5454 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, |
5455 | target_mode); | |
ef12ae45 PB |
5456 | if (tem) |
5457 | return tem; | |
44037a66 | 5458 | |
495499da PB |
5459 | /* If we reached here, we can't do this with a scc insn, however there |
5460 | are some comparisons that can be done in other ways. Don't do any | |
5461 | of these cases if branches are very cheap. */ | |
5462 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
5463 | return 0; | |
5464 | ||
5465 | /* See what we need to return. We can only return a 1, -1, or the | |
5466 | sign bit. */ | |
5467 | ||
5468 | if (normalizep == 0) | |
5469 | { | |
5470 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5471 | normalizep = STORE_FLAG_VALUE; | |
5472 | ||
2d0c270f | 5473 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) |
495499da PB |
5474 | ; |
5475 | else | |
5476 | return 0; | |
5477 | } | |
5478 | ||
ef12ae45 PB |
5479 | last = get_last_insn (); |
5480 | ||
7c27e184 PB |
5481 | /* If optimizing, use different pseudo registers for each insn, instead |
5482 | of reusing the same pseudo. This leads to better CSE, but slows | |
5483 | down the compiler, since there are more pseudos */ | |
5484 | subtarget = (!optimize | |
91e66235 | 5485 | && (target_mode == mode)) ? target : NULL_RTX; |
495499da PB |
5486 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); |
5487 | ||
5488 | /* For floating-point comparisons, try the reverse comparison or try | |
5489 | changing the "orderedness" of the comparison. */ | |
5490 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
5491 | { | |
5492 | enum rtx_code first_code; | |
5493 | bool and_them; | |
5494 | ||
5495 | rcode = reverse_condition_maybe_unordered (code); | |
5496 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
5497 | && (code == ORDERED || code == UNORDERED | |
5498 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5499 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5500 | { | |
533d4b99 PB |
5501 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5502 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5503 | ||
495499da | 5504 | /* For the reverse comparison, use either an addition or a XOR. */ |
533d4b99 | 5505 | if (want_add |
68f932c4 | 5506 | && rtx_cost (GEN_INT (normalizep), PLUS, 1, |
533d4b99 | 5507 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5508 | { |
ef12ae45 | 5509 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5510 | STORE_FLAG_VALUE, target_mode); |
495499da PB |
5511 | if (tem) |
5512 | return expand_binop (target_mode, add_optab, tem, | |
2f1cd2eb | 5513 | gen_int_mode (normalizep, target_mode), |
495499da PB |
5514 | target, 0, OPTAB_WIDEN); |
5515 | } | |
533d4b99 | 5516 | else if (!want_add |
68f932c4 | 5517 | && rtx_cost (trueval, XOR, 1, |
533d4b99 | 5518 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5519 | { |
ef12ae45 | 5520 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5521 | normalizep, target_mode); |
495499da PB |
5522 | if (tem) |
5523 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
5524 | target, INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5525 | } | |
5526 | } | |
5527 | ||
5528 | delete_insns_since (last); | |
5529 | ||
5530 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
5531 | if (code == ORDERED || code == UNORDERED) | |
5532 | return 0; | |
b8698a0f | 5533 | |
495499da PB |
5534 | and_them = split_comparison (code, mode, &first_code, &code); |
5535 | ||
5536 | /* If there are no NaNs, the first comparison should always fall through. | |
5537 | Effectively change the comparison to the other one. */ | |
5538 | if (!HONOR_NANS (mode)) | |
5539 | { | |
5540 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
92355a9c PB |
5541 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, |
5542 | target_mode); | |
495499da PB |
5543 | } |
5544 | ||
692e0312 TS |
5545 | if (!HAVE_conditional_move) |
5546 | return 0; | |
5547 | ||
495499da PB |
5548 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a |
5549 | conditional move. */ | |
92355a9c PB |
5550 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, |
5551 | normalizep, target_mode); | |
495499da PB |
5552 | if (tem == 0) |
5553 | return 0; | |
5554 | ||
5555 | if (and_them) | |
5556 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5557 | tem, const0_rtx, GET_MODE (tem), 0); | |
5558 | else | |
5559 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5560 | trueval, tem, GET_MODE (tem), 0); | |
5561 | ||
5562 | if (tem == 0) | |
5563 | delete_insns_since (last); | |
5564 | return tem; | |
495499da | 5565 | } |
44037a66 | 5566 | |
495499da PB |
5567 | /* The remaining tricks only apply to integer comparisons. */ |
5568 | ||
5569 | if (GET_MODE_CLASS (mode) != MODE_INT) | |
5570 | return 0; | |
5571 | ||
5572 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
44037a66 TG |
5573 | (or subtract) the two operands and use a recursive call to try the |
5574 | comparison with zero. Don't do any of these cases if branches are | |
5575 | very cheap. */ | |
5576 | ||
495499da | 5577 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
44037a66 TG |
5578 | { |
5579 | tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, | |
5580 | OPTAB_WIDEN); | |
5581 | ||
5582 | if (tem == 0) | |
5583 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5584 | OPTAB_WIDEN); | |
5585 | if (tem != 0) | |
a22fb74c AK |
5586 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5587 | mode, unsignedp, normalizep); | |
495499da PB |
5588 | if (tem != 0) |
5589 | return tem; | |
5590 | ||
5591 | delete_insns_since (last); | |
5592 | } | |
5593 | ||
5594 | /* For integer comparisons, try the reverse comparison. However, for | |
5595 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5596 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
5597 | rcode = reverse_condition (code); | |
5598 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
947131ba | 5599 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
495499da PB |
5600 | && code == NE |
5601 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5602 | && op1 == const0_rtx)) | |
5603 | { | |
533d4b99 PB |
5604 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5605 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5606 | ||
495499da | 5607 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
533d4b99 | 5608 | if (want_add |
68f932c4 | 5609 | && rtx_cost (GEN_INT (normalizep), PLUS, 1, |
533d4b99 | 5610 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5611 | { |
ef12ae45 | 5612 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5613 | STORE_FLAG_VALUE, target_mode); |
495499da PB |
5614 | if (tem != 0) |
5615 | tem = expand_binop (target_mode, add_optab, tem, | |
2f1cd2eb RS |
5616 | gen_int_mode (normalizep, target_mode), |
5617 | target, 0, OPTAB_WIDEN); | |
495499da | 5618 | } |
533d4b99 | 5619 | else if (!want_add |
68f932c4 | 5620 | && rtx_cost (trueval, XOR, 1, |
533d4b99 | 5621 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5622 | { |
ef12ae45 | 5623 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5624 | normalizep, target_mode); |
495499da PB |
5625 | if (tem != 0) |
5626 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, | |
5627 | INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5628 | } | |
5629 | ||
5630 | if (tem != 0) | |
5631 | return tem; | |
5632 | delete_insns_since (last); | |
44037a66 TG |
5633 | } |
5634 | ||
c410d49e | 5635 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
44037a66 TG |
5636 | the constant zero. Reject all other comparisons at this point. Only |
5637 | do LE and GT if branches are expensive since they are expensive on | |
5638 | 2-operand machines. */ | |
5639 | ||
495499da | 5640 | if (op1 != const0_rtx |
44037a66 | 5641 | || (code != EQ && code != NE |
3a4fd356 JH |
5642 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5643 | false) <= 1 || (code != LE && code != GT)))) | |
44037a66 TG |
5644 | return 0; |
5645 | ||
44037a66 TG |
5646 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5647 | do the necessary operation below. */ | |
5648 | ||
5649 | tem = 0; | |
5650 | ||
5651 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5652 | the sign bit set. */ | |
5653 | ||
5654 | if (code == LE) | |
5655 | { | |
5656 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5657 | if (rtx_equal_p (subtarget, op0)) | |
5658 | subtarget = 0; | |
5659 | ||
5660 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5661 | OPTAB_WIDEN); | |
5662 | if (tem) | |
5663 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5664 | OPTAB_WIDEN); | |
5665 | } | |
5666 | ||
5667 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5668 | number of bits in the mode of OP0, minus one. */ | |
5669 | ||
5670 | if (code == GT) | |
5671 | { | |
5672 | if (rtx_equal_p (subtarget, op0)) | |
5673 | subtarget = 0; | |
5674 | ||
5675 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 5676 | GET_MODE_BITSIZE (mode) - 1, |
44037a66 TG |
5677 | subtarget, 0); |
5678 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5679 | OPTAB_WIDEN); | |
5680 | } | |
c410d49e | 5681 | |
44037a66 TG |
5682 | if (code == EQ || code == NE) |
5683 | { | |
5684 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
cc2902df | 5685 | that converts the operand into a positive number if it is nonzero |
44037a66 TG |
5686 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5687 | for NE we negate. This puts the result in the sign bit. Then we | |
c410d49e | 5688 | normalize with a shift, if needed. |
44037a66 TG |
5689 | |
5690 | Two operations that can do the above actions are ABS and FFS, so try | |
5691 | them. If that doesn't work, and MODE is smaller than a full word, | |
36d747f6 | 5692 | we can use zero-extension to the wider mode (an unsigned conversion) |
44037a66 TG |
5693 | as the operation. */ |
5694 | ||
c410d49e EC |
5695 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5696 | that is compensated by the subsequent overflow when subtracting | |
30f7a378 | 5697 | one / negating. */ |
91ce572a | 5698 | |
947131ba | 5699 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
44037a66 | 5700 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
947131ba | 5701 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
44037a66 TG |
5702 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5703 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5704 | { | |
c2ec26b8 | 5705 | tem = convert_modes (word_mode, mode, op0, 1); |
81722fa9 | 5706 | mode = word_mode; |
44037a66 TG |
5707 | } |
5708 | ||
5709 | if (tem != 0) | |
5710 | { | |
5711 | if (code == EQ) | |
5712 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5713 | 0, OPTAB_WIDEN); | |
5714 | else | |
5715 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5716 | } | |
5717 | ||
5718 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5719 | of the value with itself. For EQ, we take the one's complement of | |
5720 | that "or", which is an extra insn, so we only handle EQ if branches | |
5721 | are expensive. */ | |
5722 | ||
3a4fd356 JH |
5723 | if (tem == 0 |
5724 | && (code == NE | |
5725 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
5726 | false) > 1)) | |
44037a66 | 5727 | { |
36d747f6 RS |
5728 | if (rtx_equal_p (subtarget, op0)) |
5729 | subtarget = 0; | |
5730 | ||
44037a66 TG |
5731 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5732 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5733 | OPTAB_WIDEN); | |
5734 | ||
5735 | if (tem && code == EQ) | |
5736 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5737 | } | |
5738 | } | |
5739 | ||
5740 | if (tem && normalizep) | |
5741 | tem = expand_shift (RSHIFT_EXPR, mode, tem, | |
eb6c3df1 | 5742 | GET_MODE_BITSIZE (mode) - 1, |
91e66235 | 5743 | subtarget, normalizep == 1); |
44037a66 | 5744 | |
91e66235 | 5745 | if (tem) |
44037a66 | 5746 | { |
495499da PB |
5747 | if (!target) |
5748 | ; | |
5749 | else if (GET_MODE (tem) != target_mode) | |
91e66235 MM |
5750 | { |
5751 | convert_move (target, tem, 0); | |
5752 | tem = target; | |
5753 | } | |
5754 | else if (!subtarget) | |
5755 | { | |
5756 | emit_move_insn (target, tem); | |
5757 | tem = target; | |
5758 | } | |
44037a66 | 5759 | } |
91e66235 | 5760 | else |
44037a66 TG |
5761 | delete_insns_since (last); |
5762 | ||
5763 | return tem; | |
5764 | } | |
04a8ee2f TG |
5765 | |
5766 | /* Like emit_store_flag, but always succeeds. */ | |
5767 | ||
5768 | rtx | |
502b8322 | 5769 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
ef4bddc2 | 5770 | machine_mode mode, int unsignedp, int normalizep) |
04a8ee2f | 5771 | { |
f3f6fb16 DM |
5772 | rtx tem; |
5773 | rtx_code_label *label; | |
495499da | 5774 | rtx trueval, falseval; |
04a8ee2f TG |
5775 | |
5776 | /* First see if emit_store_flag can do the job. */ | |
5777 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5778 | if (tem != 0) | |
5779 | return tem; | |
5780 | ||
495499da PB |
5781 | if (!target) |
5782 | target = gen_reg_rtx (word_mode); | |
04a8ee2f | 5783 | |
495499da PB |
5784 | /* If this failed, we have to do this with set/compare/jump/set code. |
5785 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
5786 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
b8698a0f | 5787 | if (code == NE |
495499da PB |
5788 | && GET_MODE_CLASS (mode) == MODE_INT |
5789 | && REG_P (target) | |
5790 | && op0 == target | |
5791 | && op1 == const0_rtx) | |
5792 | { | |
5793 | label = gen_label_rtx (); | |
1476d1bd MM |
5794 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, |
5795 | NULL_RTX, NULL, label, -1); | |
495499da PB |
5796 | emit_move_insn (target, trueval); |
5797 | emit_label (label); | |
5798 | return target; | |
5799 | } | |
04a8ee2f | 5800 | |
f8cfc6aa | 5801 | if (!REG_P (target) |
04a8ee2f TG |
5802 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
5803 | target = gen_reg_rtx (GET_MODE (target)); | |
5804 | ||
495499da PB |
5805 | /* Jump in the right direction if the target cannot implement CODE |
5806 | but can jump on its reverse condition. */ | |
5807 | falseval = const0_rtx; | |
5808 | if (! can_compare_p (code, mode, ccp_jump) | |
5809 | && (! FLOAT_MODE_P (mode) | |
5810 | || code == ORDERED || code == UNORDERED | |
5811 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5812 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5813 | { | |
5814 | enum rtx_code rcode; | |
5815 | if (FLOAT_MODE_P (mode)) | |
5816 | rcode = reverse_condition_maybe_unordered (code); | |
5817 | else | |
5818 | rcode = reverse_condition (code); | |
5819 | ||
5820 | /* Canonicalize to UNORDERED for the libcall. */ | |
5821 | if (can_compare_p (rcode, mode, ccp_jump) | |
5822 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
5823 | { | |
5824 | falseval = trueval; | |
5825 | trueval = const0_rtx; | |
5826 | code = rcode; | |
5827 | } | |
5828 | } | |
5829 | ||
5830 | emit_move_insn (target, trueval); | |
04a8ee2f | 5831 | label = gen_label_rtx (); |
1476d1bd MM |
5832 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, |
5833 | label, -1); | |
04a8ee2f | 5834 | |
495499da | 5835 | emit_move_insn (target, falseval); |
44037a66 TG |
5836 | emit_label (label); |
5837 | ||
5838 | return target; | |
5839 | } | |
f5963e61 JL |
5840 | \f |
5841 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
feb04780 RS |
5842 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
5843 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
f5963e61 JL |
5844 | |
5845 | static void | |
ef4bddc2 | 5846 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
f3f6fb16 | 5847 | rtx_code_label *label) |
f5963e61 | 5848 | { |
feb04780 | 5849 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
1476d1bd MM |
5850 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, |
5851 | NULL, label, -1); | |
f5963e61 | 5852 | } |