]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.md
use rtx_insn * more places where it is obvious
[thirdparty/gcc.git] / gcc / config / spu / spu.md
1 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
2
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
6 ;; any later version.
7
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 ;; for more details.
12
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
16
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
18
19 \f
20 ;; Define an insn type attribute. This is used in function unit delay
21 ;; computations.
22 ;; multi0 is a multiple insn rtl whose first insn is in pipe0
23 ;; multi1 is a multiple insn rtl whose first insn is in pipe1
24 (define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert"
25 (const_string "fx2"))
26
27 ;; Length (in bytes).
28 (define_attr "length" ""
29 (const_int 4))
30
31 (define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
32 ;; Processor type -- this attribute must exactly match the processor_type
33 ;; enumeration in spu.h.
34
35 (define_attr "cpu" "spu"
36 (const (symbol_ref "spu_cpu_attr")))
37
38 ; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
39 ; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST])
40
41 (define_cpu_unit "pipe0,pipe1,fp,ls")
42
43 (define_insn_reservation "NOP" 1 (eq_attr "type" "nop")
44 "pipe0")
45
46 (define_insn_reservation "FX2" 2 (eq_attr "type" "fx2")
47 "pipe0, nothing")
48
49 (define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb")
50 "pipe0, nothing*3")
51
52 (define_insn_reservation "FP6" 6 (eq_attr "type" "fp6")
53 "pipe0 + fp, nothing*5")
54
55 (define_insn_reservation "FP7" 7 (eq_attr "type" "fp7")
56 "pipe0, fp, nothing*5")
57
58 ;; The behavior of the double precision is that both pipes stall
59 ;; for 6 cycles and the rest of the operation pipelines for
60 ;; 7 cycles. The simplest way to model this is to simply ignore
61 ;; the 6 cyle stall.
62 (define_insn_reservation "FPD" 7
63 (and (eq_attr "tune" "cell")
64 (eq_attr "type" "fpd"))
65 "pipe0 + pipe1, fp, nothing*5")
66
67 ;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
68 (define_insn_reservation "FPD_CELLEDP" 9
69 (and (eq_attr "tune" "celledp")
70 (eq_attr "type" "fpd"))
71 "pipe0 + fp, nothing*8")
72
73 (define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
74 "pipe1")
75
76 (define_insn_reservation "STORE" 1 (eq_attr "type" "store")
77 "pipe1 + ls")
78
79 (define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch")
80 "pipe1 + ls")
81
82 (define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr")
83 "pipe1, nothing*3")
84
85 (define_insn_reservation "LOAD" 6 (eq_attr "type" "load")
86 "pipe1 + ls, nothing*5")
87
88 (define_insn_reservation "HBR" 18 (eq_attr "type" "hbr")
89 "pipe1, nothing*15")
90
91 (define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0")
92 "pipe0+pipe1, nothing*3")
93
94 (define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1")
95 "pipe1, nothing*3")
96
97 (define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert")
98 "nothing")
99
100 ;; Force pipe0 to occur before pipe 1 in a cycle.
101 (absence_set "pipe0" "pipe1")
102
103 \f
104 (define_c_enum "unspec" [
105 UNSPEC_IPREFETCH
106 UNSPEC_FREST
107 UNSPEC_FRSQEST
108 UNSPEC_FI
109 UNSPEC_EXTEND_CMP
110 UNSPEC_CG
111 UNSPEC_CGX
112 UNSPEC_ADDX
113 UNSPEC_BG
114 UNSPEC_BGX
115 UNSPEC_SFX
116 UNSPEC_FSM
117 UNSPEC_HBR
118 UNSPEC_NOP
119 UNSPEC_CONVERT
120 UNSPEC_SELB
121 UNSPEC_SHUFB
122 UNSPEC_CPAT
123 UNSPEC_CNTB
124 UNSPEC_SUMB
125 UNSPEC_FSMB
126 UNSPEC_FSMH
127 UNSPEC_GBB
128 UNSPEC_GBH
129 UNSPEC_GB
130 UNSPEC_AVGB
131 UNSPEC_ABSDB
132 UNSPEC_ORX
133 UNSPEC_HEQ
134 UNSPEC_HGT
135 UNSPEC_HLGT
136 UNSPEC_STOP
137 UNSPEC_STOPD
138 UNSPEC_SET_INTR
139 UNSPEC_FSCRRD
140 UNSPEC_FSCRWR
141 UNSPEC_MFSPR
142 UNSPEC_MTSPR
143 UNSPEC_RDCH
144 UNSPEC_RCHCNT
145 UNSPEC_WRCH
146 UNSPEC_SPU_REALIGN_LOAD
147 UNSPEC_SPU_MASK_FOR_LOAD
148 UNSPEC_DFTSV
149 UNSPEC_FLOAT_EXTEND
150 UNSPEC_FLOAT_TRUNCATE
151 UNSPEC_SP_SET
152 UNSPEC_SP_TEST
153 ])
154
155 (define_c_enum "unspecv" [
156 UNSPECV_BLOCKAGE
157 UNSPECV_LNOP
158 UNSPECV_NOP
159 UNSPECV_SYNC
160 ])
161
162 (include "predicates.md")
163 (include "constraints.md")
164
165 \f
166 ;; Mode iterators
167
168 (define_mode_iterator ALL [QI V16QI
169 HI V8HI
170 SI V4SI
171 DI V2DI
172 TI
173 SF V4SF
174 DF V2DF])
175
176 ; Everything except DI and TI which are handled separately because
177 ; they need different constraints to correctly test VOIDmode constants
178 (define_mode_iterator MOV [QI V16QI
179 HI V8HI
180 SI V4SI
181 V2DI
182 SF V4SF
183 DF V2DF])
184
185 (define_mode_iterator QHSI [QI HI SI])
186 (define_mode_iterator QHSDI [QI HI SI DI])
187 (define_mode_iterator DTI [DI TI])
188
189 (define_mode_iterator VINT [QI V16QI
190 HI V8HI
191 SI V4SI
192 DI V2DI
193 TI])
194
195 (define_mode_iterator VQHSI [QI V16QI
196 HI V8HI
197 SI V4SI])
198
199 (define_mode_iterator VHSI [HI V8HI
200 SI V4SI])
201
202 (define_mode_iterator VSDF [SF V4SF
203 DF V2DF])
204
205 (define_mode_iterator VSI [SI V4SI])
206 (define_mode_iterator VDI [DI V2DI])
207 (define_mode_iterator VSF [SF V4SF])
208 (define_mode_iterator VDF [DF V2DF])
209
210 (define_mode_iterator VCMP [V16QI
211 V8HI
212 V4SI
213 V4SF
214 V2DF])
215
216 (define_mode_iterator VCMPU [V16QI
217 V8HI
218 V4SI])
219
220 (define_mode_attr v [(V8HI "v") (V4SI "v")
221 (HI "") (SI "")])
222
223 (define_mode_attr bh [(QI "b") (V16QI "b")
224 (HI "h") (V8HI "h")
225 (SI "") (V4SI "")])
226
227 (define_mode_attr d [(SF "") (V4SF "")
228 (DF "d") (V2DF "d")])
229 (define_mode_attr d6 [(SF "6") (V4SF "6")
230 (DF "d") (V2DF "d")])
231
232 (define_mode_attr f2i [(SF "si") (V4SF "v4si")
233 (DF "di") (V2DF "v2di")])
234 (define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
235 (DF "DI") (V2DF "V2DI")])
236 (define_mode_attr i2f [(SI "sf") (V4SI "v4sf")
237 (DI "df") (V2DI "v2df")])
238 (define_mode_attr I2F [(SI "SF") (V4SI "V4SF")
239 (DI "DF") (V2DI "V2DF")])
240
241 (define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
242
243 (define_mode_attr umask [(HI "f") (V8HI "f")
244 (SI "g") (V4SI "g")])
245 (define_mode_attr nmask [(HI "F") (V8HI "F")
246 (SI "G") (V4SI "G")])
247
248 ;; Used for carry and borrow instructions.
249 (define_mode_iterator CBOP [SI DI V4SI V2DI])
250
251 ;; Used in vec_set and vec_extract
252 (define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF])
253 (define_mode_attr inner [(V16QI "QI")
254 (V8HI "HI")
255 (V4SI "SI")
256 (V2DI "DI")
257 (V4SF "SF")
258 (V2DF "DF")])
259 (define_mode_attr vmult [(V16QI "1")
260 (V8HI "2")
261 (V4SI "4")
262 (V2DI "8")
263 (V4SF "4")
264 (V2DF "8")])
265 (define_mode_attr voff [(V16QI "13")
266 (V8HI "14")
267 (V4SI "0")
268 (V2DI "0")
269 (V4SF "0")
270 (V2DF "0")])
271
272 \f
273 ;; mov
274
275 (define_expand "mov<mode>"
276 [(set (match_operand:ALL 0 "nonimmediate_operand" "")
277 (match_operand:ALL 1 "general_operand" ""))]
278 ""
279 {
280 if (spu_expand_mov(operands, <MODE>mode))
281 DONE;
282 })
283
284 (define_split
285 [(set (match_operand 0 "spu_reg_operand")
286 (match_operand 1 "immediate_operand"))]
287
288 ""
289 [(set (match_dup 0)
290 (high (match_dup 1)))
291 (set (match_dup 0)
292 (lo_sum (match_dup 0)
293 (match_dup 1)))]
294 {
295 if (spu_split_immediate (operands))
296 DONE;
297 FAIL;
298 })
299
300 (define_insn "pic"
301 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
302 (match_operand:SI 1 "immediate_operand" "s"))
303 (use (const_int 0))]
304 "flag_pic"
305 "ila\t%0,%%pic(%1)")
306
307 ;; Whenever a function generates the 'pic' pattern above we need to
308 ;; load the pic_offset_table register.
309 ;; GCC doesn't deal well with labels in the middle of a block so we
310 ;; hardcode the offsets in the asm here.
311 (define_insn "load_pic_offset"
312 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
313 (unspec:SI [(const_int 0)] 0))
314 (set (match_operand:SI 1 "spu_reg_operand" "=r")
315 (unspec:SI [(const_int 0)] 0))]
316 "flag_pic"
317 "ila\t%1,.+8\;brsl\t%0,4"
318 [(set_attr "length" "8")
319 (set_attr "type" "multi0")])
320
321 \f
322 ;; move internal
323
324 (define_insn "_mov<mode>"
325 [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m")
326 (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
327 "register_operand(operands[0], <MODE>mode)
328 || register_operand(operands[1], <MODE>mode)"
329 "@
330 ori\t%0,%1,0
331 il%s1\t%0,%S1
332 fsmbi\t%0,%S1
333 c%s1d\t%0,%S1($sp)
334 lq%p1\t%0,%1
335 stq%p0\t%1,%0"
336 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
337
338 (define_insn "low_<mode>"
339 [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
340 (lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0")
341 (match_operand:VSI 2 "immediate_operand" "i")))]
342 ""
343 "iohl\t%0,%2@l")
344
345 (define_insn "_movdi"
346 [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m")
347 (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
348 "register_operand(operands[0], DImode)
349 || register_operand(operands[1], DImode)"
350 "@
351 ori\t%0,%1,0
352 il%d1\t%0,%D1
353 fsmbi\t%0,%D1
354 c%d1d\t%0,%D1($sp)
355 lq%p1\t%0,%1
356 stq%p0\t%1,%0"
357 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
358
359 (define_insn "_movti"
360 [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m")
361 (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
362 "register_operand(operands[0], TImode)
363 || register_operand(operands[1], TImode)"
364 "@
365 ori\t%0,%1,0
366 il%t1\t%0,%T1
367 fsmbi\t%0,%T1
368 c%t1d\t%0,%T1($sp)
369 lq%p1\t%0,%1
370 stq%p0\t%1,%0"
371 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
372
373 (define_split
374 [(set (match_operand 0 "spu_reg_operand")
375 (match_operand 1 "memory_operand"))]
376 "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
377 && GET_MODE(operands[0]) == GET_MODE(operands[1])
378 && !reload_in_progress && !reload_completed"
379 [(set (match_dup 0)
380 (match_dup 1))]
381 { if (spu_split_load(operands))
382 DONE;
383 })
384
385 (define_split
386 [(set (match_operand 0 "memory_operand")
387 (match_operand 1 "spu_reg_operand"))]
388 "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
389 && GET_MODE(operands[0]) == GET_MODE(operands[1])
390 && !reload_in_progress && !reload_completed"
391 [(set (match_dup 0)
392 (match_dup 1))]
393 { if (spu_split_store(operands))
394 DONE;
395 })
396 ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
397
398 (define_expand "cpat"
399 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
400 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
401 (match_operand:SI 2 "spu_nonmem_operand" "r,n")
402 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
403 ""
404 {
405 rtx x = gen_cpat_const (operands);
406 if (x)
407 {
408 emit_move_insn (operands[0], x);
409 DONE;
410 }
411 })
412
413 (define_insn "_cpat"
414 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
415 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
416 (match_operand:SI 2 "spu_nonmem_operand" "r,n")
417 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
418 ""
419 "@
420 c%M3x\t%0,%1,%2
421 c%M3d\t%0,%C2(%1)"
422 [(set_attr "type" "shuf")])
423
424 (define_split
425 [(set (match_operand:TI 0 "spu_reg_operand")
426 (unspec:TI [(match_operand:SI 1 "spu_nonmem_operand")
427 (match_operand:SI 2 "immediate_operand")
428 (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))]
429 ""
430 [(set (match_dup:TI 0)
431 (match_dup:TI 4))]
432 {
433 operands[4] = gen_cpat_const (operands);
434 if (!operands[4])
435 FAIL;
436 })
437 \f
438 ;; extend
439
440 (define_insn "extendqihi2"
441 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
442 (sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
443 ""
444 "xsbh\t%0,%1")
445
446 (define_insn "extendhisi2"
447 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
448 (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))]
449 ""
450 "xshw\t%0,%1")
451
452 (define_expand "extendsidi2"
453 [(set (match_dup:DI 2)
454 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "")))
455 (set (match_operand:DI 0 "spu_reg_operand" "")
456 (sign_extend:DI (vec_select:SI (match_dup:V2SI 3)
457 (parallel [(const_int 1)]))))]
458 ""
459 {
460 operands[2] = gen_reg_rtx (DImode);
461 operands[3] = spu_gen_subreg (V2SImode, operands[2]);
462 })
463
464 (define_insn "xswd"
465 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
466 (sign_extend:DI
467 (vec_select:SI
468 (match_operand:V2SI 1 "spu_reg_operand" "r")
469 (parallel [(const_int 1) ]))))]
470 ""
471 "xswd\t%0,%1");
472
473 ;; By splitting this late we don't allow much opportunity for sharing of
474 ;; constants. That's ok because this should really be optimized away.
475 (define_insn_and_split "extend<mode>ti2"
476 [(set (match_operand:TI 0 "register_operand" "")
477 (sign_extend:TI (match_operand:QHSDI 1 "register_operand" "")))]
478 ""
479 "#"
480 ""
481 [(set (match_dup:TI 0)
482 (sign_extend:TI (match_dup:QHSDI 1)))]
483 {
484 spu_expand_sign_extend(operands);
485 DONE;
486 })
487
488 \f
489 ;; zero_extend
490
491 (define_insn "zero_extendqihi2"
492 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
493 (zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
494 ""
495 "andi\t%0,%1,0x00ff")
496
497 (define_insn "zero_extendqisi2"
498 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
499 (zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))]
500 ""
501 "andi\t%0,%1,0x00ff")
502
503 (define_expand "zero_extendhisi2"
504 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
505 (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))
506 (clobber (match_scratch:SI 2 "=&r"))]
507 ""
508 {
509 rtx mask = gen_reg_rtx (SImode);
510 rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0);
511 emit_move_insn (mask, GEN_INT (0xffff));
512 emit_insn (gen_andsi3(operands[0], op1, mask));
513 DONE;
514 })
515
516 (define_insn "zero_extendsidi2"
517 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
518 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))]
519 ""
520 "rotqmbyi\t%0,%1,-4"
521 [(set_attr "type" "shuf")])
522
523 (define_insn "zero_extendqiti2"
524 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
525 (zero_extend:TI (match_operand:QI 1 "spu_reg_operand" "r")))]
526 ""
527 "andi\t%0,%1,0x00ff\;rotqmbyi\t%0,%0,-12"
528 [(set_attr "type" "multi0")
529 (set_attr "length" "8")])
530
531 (define_insn "zero_extendhiti2"
532 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
533 (zero_extend:TI (match_operand:HI 1 "spu_reg_operand" "r")))]
534 ""
535 "shli\t%0,%1,16\;rotqmbyi\t%0,%0,-14"
536 [(set_attr "type" "multi1")
537 (set_attr "length" "8")])
538
539 (define_insn "zero_extendsiti2"
540 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
541 (zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
542 ""
543 "rotqmbyi\t%0,%1,-12"
544 [(set_attr "type" "shuf")])
545
546 (define_insn "zero_extendditi2"
547 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
548 (zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))]
549 ""
550 "rotqmbyi\t%0,%1,-8"
551 [(set_attr "type" "shuf")])
552
553 \f
554 ;; trunc
555
556 (define_insn "truncdiqi2"
557 [(set (match_operand:QI 0 "spu_reg_operand" "=r")
558 (truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))]
559 ""
560 "shlqbyi\t%0,%1,4"
561 [(set_attr "type" "shuf")])
562
563 (define_insn "truncdihi2"
564 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
565 (truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))]
566 ""
567 "shlqbyi\t%0,%1,4"
568 [(set_attr "type" "shuf")])
569
570 (define_insn "truncdisi2"
571 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
572 (truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))]
573 ""
574 "shlqbyi\t%0,%1,4"
575 [(set_attr "type" "shuf")])
576
577 (define_insn "trunctiqi2"
578 [(set (match_operand:QI 0 "spu_reg_operand" "=r")
579 (truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))]
580 ""
581 "shlqbyi\t%0,%1,12"
582 [(set_attr "type" "shuf")])
583
584 (define_insn "trunctihi2"
585 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
586 (truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))]
587 ""
588 "shlqbyi\t%0,%1,12"
589 [(set_attr "type" "shuf")])
590
591 (define_insn "trunctisi2"
592 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
593 (truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))]
594 ""
595 "shlqbyi\t%0,%1,12"
596 [(set_attr "type" "shuf")])
597
598 (define_insn "trunctidi2"
599 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
600 (truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))]
601 ""
602 "shlqbyi\t%0,%1,8"
603 [(set_attr "type" "shuf")])
604
605 \f
606 ;; float conversions
607
608 (define_insn "float<mode><i2f>2"
609 [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
610 (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
611 ""
612 "csflt\t%0,%1,0"
613 [(set_attr "type" "fp7")])
614
615 (define_insn "fix_trunc<mode><f2i>2"
616 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
617 (fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
618 ""
619 "cflts\t%0,%1,0"
620 [(set_attr "type" "fp7")])
621
622 (define_insn "floatuns<mode><i2f>2"
623 [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
624 (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
625 ""
626 "cuflt\t%0,%1,0"
627 [(set_attr "type" "fp7")])
628
629 (define_insn "fixuns_trunc<mode><f2i>2"
630 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
631 (unsigned_fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
632 ""
633 "cfltu\t%0,%1,0"
634 [(set_attr "type" "fp7")])
635
636 (define_insn "float<mode><i2f>2_mul"
637 [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
638 (mult:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
639 (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
640 ""
641 "csflt\t%0,%1,%w2"
642 [(set_attr "type" "fp7")])
643
644 (define_insn "float<mode><i2f>2_div"
645 [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
646 (div:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
647 (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
648 ""
649 "csflt\t%0,%1,%v2"
650 [(set_attr "type" "fp7")])
651
652
653 (define_insn "fix_trunc<mode><f2i>2_mul"
654 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
655 (fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
656 (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
657 ""
658 "cflts\t%0,%1,%v2"
659 [(set_attr "type" "fp7")])
660
661 (define_insn "floatuns<mode><i2f>2_mul"
662 [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
663 (mult:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
664 (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
665 ""
666 "cuflt\t%0,%1,%w2"
667 [(set_attr "type" "fp7")])
668
669 (define_insn "floatuns<mode><i2f>2_div"
670 [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
671 (div:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
672 (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
673 ""
674 "cuflt\t%0,%1,%v2"
675 [(set_attr "type" "fp7")])
676
677 (define_insn "fixuns_trunc<mode><f2i>2_mul"
678 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
679 (unsigned_fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
680 (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
681 ""
682 "cfltu\t%0,%1,%v2"
683 [(set_attr "type" "fp7")])
684
685 (define_insn "extendsfdf2"
686 [(set (match_operand:DF 0 "spu_reg_operand" "=r")
687 (unspec:DF [(match_operand:SF 1 "spu_reg_operand" "r")]
688 UNSPEC_FLOAT_EXTEND))]
689 ""
690 "fesd\t%0,%1"
691 [(set_attr "type" "fpd")])
692
693 (define_insn "truncdfsf2"
694 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
695 (unspec:SF [(match_operand:DF 1 "spu_reg_operand" "r")]
696 UNSPEC_FLOAT_TRUNCATE))]
697 ""
698 "frds\t%0,%1"
699 [(set_attr "type" "fpd")])
700
701 (define_expand "floatdisf2"
702 [(set (match_operand:SF 0 "register_operand" "")
703 (float:SF (match_operand:DI 1 "register_operand" "")))]
704 ""
705 {
706 rtx c0 = gen_reg_rtx (SImode);
707 rtx r0 = gen_reg_rtx (DImode);
708 rtx r1 = gen_reg_rtx (SFmode);
709 rtx r2 = gen_reg_rtx (SImode);
710 rtx setneg = gen_reg_rtx (SImode);
711 rtx isneg = gen_reg_rtx (SImode);
712 rtx neg = gen_reg_rtx (DImode);
713 rtx mask = gen_reg_rtx (DImode);
714
715 emit_move_insn (c0, GEN_INT (-0x80000000ll));
716
717 emit_insn (gen_negdi2 (neg, operands[1]));
718 emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
719 emit_insn (gen_extend_compare (mask, isneg));
720 emit_insn (gen_selb (r0, neg, operands[1], mask));
721 emit_insn (gen_andc_si (setneg, c0, isneg));
722
723 emit_insn (gen_floatunsdisf2 (r1, r0));
724
725 emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
726 emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
727 DONE;
728 })
729
730 (define_insn_and_split "floatunsdisf2"
731 [(set (match_operand:SF 0 "register_operand" "=r")
732 (unsigned_float:SF (match_operand:DI 1 "register_operand" "r")))
733 (clobber (match_scratch:SF 2 "=r"))
734 (clobber (match_scratch:SF 3 "=r"))
735 (clobber (match_scratch:SF 4 "=r"))]
736 ""
737 "#"
738 "reload_completed"
739 [(set (match_dup:SF 0)
740 (unsigned_float:SF (match_dup:DI 1)))]
741 {
742 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
743 rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
744 rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
745 rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
746
747 REAL_VALUE_TYPE scale;
748 real_2expN (&scale, 32, SFmode);
749
750 emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
751 emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
752
753 emit_move_insn (operands[4],
754 const_double_from_real_value (scale, SFmode));
755 emit_insn (gen_fmasf4 (operands[0],
756 operands[2], operands[4], operands[3]));
757 DONE;
758 })
759
760 (define_expand "floattisf2"
761 [(set (match_operand:SF 0 "register_operand" "")
762 (float:SF (match_operand:TI 1 "register_operand" "")))]
763 ""
764 {
765 rtx c0 = gen_reg_rtx (SImode);
766 rtx r0 = gen_reg_rtx (TImode);
767 rtx r1 = gen_reg_rtx (SFmode);
768 rtx r2 = gen_reg_rtx (SImode);
769 rtx setneg = gen_reg_rtx (SImode);
770 rtx isneg = gen_reg_rtx (SImode);
771 rtx neg = gen_reg_rtx (TImode);
772 rtx mask = gen_reg_rtx (TImode);
773
774 emit_move_insn (c0, GEN_INT (-0x80000000ll));
775
776 emit_insn (gen_negti2 (neg, operands[1]));
777 emit_insn (gen_cgt_ti_m1 (isneg, operands[1]));
778 emit_insn (gen_extend_compare (mask, isneg));
779 emit_insn (gen_selb (r0, neg, operands[1], mask));
780 emit_insn (gen_andc_si (setneg, c0, isneg));
781
782 emit_insn (gen_floatunstisf2 (r1, r0));
783
784 emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
785 emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
786 DONE;
787 })
788
789 (define_insn_and_split "floatunstisf2"
790 [(set (match_operand:SF 0 "register_operand" "=r")
791 (unsigned_float:SF (match_operand:TI 1 "register_operand" "r")))
792 (clobber (match_scratch:SF 2 "=r"))
793 (clobber (match_scratch:SF 3 "=r"))
794 (clobber (match_scratch:SF 4 "=r"))]
795 ""
796 "#"
797 "reload_completed"
798 [(set (match_dup:SF 0)
799 (unsigned_float:SF (match_dup:TI 1)))]
800 {
801 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
802 rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
803 rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
804 rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
805
806 REAL_VALUE_TYPE scale;
807 real_2expN (&scale, 32, SFmode);
808
809 emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
810 emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
811
812 emit_move_insn (operands[4],
813 const_double_from_real_value (scale, SFmode));
814 emit_insn (gen_fmasf4 (operands[2],
815 operands[2], operands[4], operands[3]));
816
817 emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
818 emit_insn (gen_fmasf4 (operands[2],
819 operands[2], operands[4], operands[3]));
820
821 emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
822 emit_insn (gen_fmasf4 (operands[0],
823 operands[2], operands[4], operands[3]));
824 DONE;
825 })
826
827 ;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000
828 (define_expand "floatsidf2"
829 [(set (match_operand:DF 0 "register_operand" "")
830 (float:DF (match_operand:SI 1 "register_operand" "")))]
831 ""
832 {
833 rtx c0 = gen_reg_rtx (SImode);
834 rtx c1 = gen_reg_rtx (DFmode);
835 rtx r0 = gen_reg_rtx (SImode);
836 rtx r1 = gen_reg_rtx (DFmode);
837
838 emit_move_insn (c0, GEN_INT (-0x80000000ll));
839 emit_move_insn (c1, spu_float_const ("2147483648", DFmode));
840 emit_insn (gen_xorsi3 (r0, operands[1], c0));
841 emit_insn (gen_floatunssidf2 (r1, r0));
842 emit_insn (gen_subdf3 (operands[0], r1, c1));
843 DONE;
844 })
845
846 (define_expand "floatunssidf2"
847 [(set (match_operand:DF 0 "register_operand" "=r")
848 (unsigned_float:DF (match_operand:SI 1 "register_operand" "r")))]
849 ""
850 "{
851 rtx value;
852 rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080,
853 0x06071415, 0x16178080);
854 rtx r0 = gen_reg_rtx (V16QImode);
855
856 if (optimize_size)
857 {
858 start_sequence ();
859 value =
860 emit_library_call_value (convert_optab_libfunc (ufloat_optab,
861 DFmode, SImode),
862 NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], SImode);
863 rtx_insn *insns = get_insns ();
864 end_sequence ();
865 emit_libcall_block (insns, operands[0], value,
866 gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
867 }
868 else
869 {
870 emit_move_insn (r0, c0);
871 emit_insn (gen_floatunssidf2_internal (operands[0], operands[1], r0));
872 }
873 DONE;
874 }")
875
876 (define_insn_and_split "floatunssidf2_internal"
877 [(set (match_operand:DF 0 "register_operand" "=r")
878 (unsigned_float:DF (match_operand:SI 1 "register_operand" "r")))
879 (use (match_operand:V16QI 2 "register_operand" "r"))
880 (clobber (match_scratch:V4SI 3 "=&r"))
881 (clobber (match_scratch:V4SI 4 "=&r"))
882 (clobber (match_scratch:V4SI 5 "=&r"))
883 (clobber (match_scratch:V4SI 6 "=&r"))]
884 ""
885 "clz\t%3,%1\;il\t%6,1023+31\;shl\t%4,%1,%3\;ceqi\t%5,%3,32\;sf\t%6,%3,%6\;a\t%4,%4,%4\;andc\t%6,%6,%5\;shufb\t%6,%6,%4,%2\;shlqbii\t%0,%6,4"
886 "reload_completed"
887 [(set (match_dup:DF 0)
888 (unsigned_float:DF (match_dup:SI 1)))]
889 "{
890 rtx *ops = operands;
891 rtx op1_v4si = gen_rtx_REG(V4SImode, REGNO(ops[1]));
892 rtx op0_ti = gen_rtx_REG (TImode, REGNO (ops[0]));
893 rtx op2_ti = gen_rtx_REG (TImode, REGNO (ops[2]));
894 rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6]));
895 emit_insn (gen_clzv4si2 (ops[3],op1_v4si));
896 emit_move_insn (ops[6], spu_const (V4SImode, 1023+31));
897 emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3]));
898 emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32)));
899 emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3]));
900 emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4]));
901 emit_insn (gen_andc_v4si (ops[6],ops[6],ops[5]));
902 emit_insn (gen_shufb (ops[6],ops[6],ops[4],op2_ti));
903 emit_insn (gen_shlqbi_ti (op0_ti,op6_ti,GEN_INT(4)));
904 DONE;
905 }"
906 [(set_attr "length" "32")])
907
908 (define_expand "floatdidf2"
909 [(set (match_operand:DF 0 "register_operand" "")
910 (float:DF (match_operand:DI 1 "register_operand" "")))]
911 ""
912 {
913 rtx c0 = gen_reg_rtx (DImode);
914 rtx r0 = gen_reg_rtx (DImode);
915 rtx r1 = gen_reg_rtx (DFmode);
916 rtx r2 = gen_reg_rtx (DImode);
917 rtx setneg = gen_reg_rtx (DImode);
918 rtx isneg = gen_reg_rtx (SImode);
919 rtx neg = gen_reg_rtx (DImode);
920 rtx mask = gen_reg_rtx (DImode);
921
922 emit_move_insn (c0, GEN_INT (0x8000000000000000ull));
923
924 emit_insn (gen_negdi2 (neg, operands[1]));
925 emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
926 emit_insn (gen_extend_compare (mask, isneg));
927 emit_insn (gen_selb (r0, neg, operands[1], mask));
928 emit_insn (gen_andc_di (setneg, c0, mask));
929
930 emit_insn (gen_floatunsdidf2 (r1, r0));
931
932 emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg));
933 emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0));
934 DONE;
935 })
936
937 (define_expand "floatunsdidf2"
938 [(set (match_operand:DF 0 "register_operand" "=r")
939 (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))]
940 ""
941 "{
942 rtx value, insns;
943 rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080,
944 0x06071415, 0x16178080);
945 rtx c1 = spu_const_from_ints (V4SImode, 1023+63, 1023+31, 0, 0);
946 rtx r0 = gen_reg_rtx (V16QImode);
947 rtx r1 = gen_reg_rtx (V4SImode);
948
949 if (optimize_size)
950 {
951 start_sequence ();
952 value =
953 emit_library_call_value (convert_optab_libfunc (ufloat_optab,
954 DFmode, DImode),
955 NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], DImode);
956 rtx_insn *insns = get_insns ();
957 end_sequence ();
958 emit_libcall_block (insns, operands[0], value,
959 gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
960 }
961 else
962 {
963 emit_move_insn (r1, c1);
964 emit_move_insn (r0, c0);
965 emit_insn (gen_floatunsdidf2_internal (operands[0], operands[1], r0, r1));
966 }
967 DONE;
968 }")
969
970 (define_insn_and_split "floatunsdidf2_internal"
971 [(set (match_operand:DF 0 "register_operand" "=r")
972 (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))
973 (use (match_operand:V16QI 2 "register_operand" "r"))
974 (use (match_operand:V4SI 3 "register_operand" "r"))
975 (clobber (match_scratch:V4SI 4 "=&r"))
976 (clobber (match_scratch:V4SI 5 "=&r"))
977 (clobber (match_scratch:V4SI 6 "=&r"))]
978 ""
979 "clz\t%4,%1\;shl\t%5,%1,%4\;ceqi\t%6,%4,32\;sf\t%4,%4,%3\;a\t%5,%5,%5\;andc\t%4,%4,%6\;shufb\t%4,%4,%5,%2\;shlqbii\t%4,%4,4\;shlqbyi\t%5,%4,8\;dfa\t%0,%4,%5"
980 "reload_completed"
981 [(set (match_operand:DF 0 "register_operand" "=r")
982 (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))]
983 "{
984 rtx *ops = operands;
985 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO(ops[1]));
986 rtx op2_ti = gen_rtx_REG (TImode, REGNO(ops[2]));
987 rtx op4_ti = gen_rtx_REG (TImode, REGNO(ops[4]));
988 rtx op5_ti = gen_rtx_REG (TImode, REGNO(ops[5]));
989 rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4]));
990 rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5]));
991 emit_insn (gen_clzv4si2 (ops[4],op1_v4si));
992 emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4]));
993 emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32)));
994 emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4]));
995 emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5]));
996 emit_insn (gen_andc_v4si (ops[4],ops[4],ops[6]));
997 emit_insn (gen_shufb (ops[4],ops[4],ops[5],op2_ti));
998 emit_insn (gen_shlqbi_ti (op4_ti,op4_ti,GEN_INT(4)));
999 emit_insn (gen_shlqby_ti (op5_ti,op4_ti,GEN_INT(8)));
1000 emit_insn (gen_adddf3 (ops[0],op4_df,op5_df));
1001 DONE;
1002 }"
1003 [(set_attr "length" "40")])
1004
1005 \f
1006 ;; add
1007
1008 (define_expand "addv16qi3"
1009 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1010 (plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
1011 (match_operand:V16QI 2 "spu_reg_operand" "r")))]
1012 ""
1013 "{
1014 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
1015 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
1016 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
1017 rtx rhs_and = gen_reg_rtx (V8HImode);
1018 rtx hi_char = gen_reg_rtx (V8HImode);
1019 rtx lo_char = gen_reg_rtx (V8HImode);
1020 rtx mask = gen_reg_rtx (V8HImode);
1021
1022 emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
1023 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
1024 emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and));
1025 emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short));
1026 emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
1027 DONE;
1028 }")
1029
1030 (define_insn "add<mode>3"
1031 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
1032 (plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
1033 (match_operand:VHSI 2 "spu_arith_operand" "r,B")))]
1034 ""
1035 "@
1036 a<bh>\t%0,%1,%2
1037 a<bh>i\t%0,%1,%2")
1038
1039 (define_expand "add<mode>3"
1040 [(set (match_dup:VDI 3)
1041 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
1042 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG))
1043 (set (match_dup:VDI 5)
1044 (unspec:VDI [(match_dup 3)
1045 (match_dup 3)
1046 (match_dup:TI 4)] UNSPEC_SHUFB))
1047 (set (match_operand:VDI 0 "spu_reg_operand" "")
1048 (unspec:VDI [(match_dup 1)
1049 (match_dup 2)
1050 (match_dup 5)] UNSPEC_ADDX))]
1051 ""
1052 {
1053 unsigned char pat[16] = {
1054 0x04, 0x05, 0x06, 0x07,
1055 0x80, 0x80, 0x80, 0x80,
1056 0x0c, 0x0d, 0x0e, 0x0f,
1057 0x80, 0x80, 0x80, 0x80
1058 };
1059 operands[3] = gen_reg_rtx (<MODE>mode);
1060 operands[4] = gen_reg_rtx (TImode);
1061 operands[5] = gen_reg_rtx (<MODE>mode);
1062 emit_move_insn (operands[4], array_to_constant (TImode, pat));
1063 })
1064
1065 (define_insn "cg_<mode>"
1066 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1067 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1068 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))]
1069 "operands != NULL"
1070 "cg\t%0,%1,%2")
1071
1072 (define_insn "cgx_<mode>"
1073 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1074 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1075 (match_operand 2 "spu_reg_operand" "r")
1076 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))]
1077 "operands != NULL"
1078 "cgx\t%0,%1,%2")
1079
1080 (define_insn "addx_<mode>"
1081 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1082 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1083 (match_operand 2 "spu_reg_operand" "r")
1084 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))]
1085 "operands != NULL"
1086 "addx\t%0,%1,%2")
1087
1088
1089 ;; This is not the most efficient implementation of addti3.
1090 ;; We include this here because 1) the compiler needs it to be
1091 ;; defined as the word size is 128-bit and 2) sometimes gcc
1092 ;; substitutes an add for a constant left-shift. 2) is unlikely
1093 ;; because we also give addti3 a high cost. In case gcc does
1094 ;; generate TImode add, here is the code to do it.
1095 ;; operand 2 is a nonmemory because the compiler requires it.
1096 (define_insn "addti3"
1097 [(set (match_operand:TI 0 "spu_reg_operand" "=&r")
1098 (plus:TI (match_operand:TI 1 "spu_reg_operand" "r")
1099 (match_operand:TI 2 "spu_nonmem_operand" "r")))
1100 (clobber (match_scratch:TI 3 "=&r"))]
1101 ""
1102 "cg\t%3,%1,%2\n\\
1103 shlqbyi\t%3,%3,4\n\\
1104 cgx\t%3,%1,%2\n\\
1105 shlqbyi\t%3,%3,4\n\\
1106 cgx\t%3,%1,%2\n\\
1107 shlqbyi\t%0,%3,4\n\\
1108 addx\t%0,%1,%2"
1109 [(set_attr "type" "multi0")
1110 (set_attr "length" "28")])
1111
1112 (define_insn "add<mode>3"
1113 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1114 (plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1115 (match_operand:VSF 2 "spu_reg_operand" "r")))]
1116 ""
1117 "fa\t%0,%1,%2"
1118 [(set_attr "type" "fp6")])
1119
1120 (define_insn "add<mode>3"
1121 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1122 (plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1123 (match_operand:VDF 2 "spu_reg_operand" "r")))]
1124 ""
1125 "dfa\t%0,%1,%2"
1126 [(set_attr "type" "fpd")])
1127
1128 \f
1129 ;; sub
1130
1131 (define_expand "subv16qi3"
1132 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1133 (minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
1134 (match_operand:V16QI 2 "spu_reg_operand" "r")))]
1135 ""
1136 "{
1137 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
1138 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
1139 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
1140 rtx rhs_and = gen_reg_rtx (V8HImode);
1141 rtx hi_char = gen_reg_rtx (V8HImode);
1142 rtx lo_char = gen_reg_rtx (V8HImode);
1143 rtx mask = gen_reg_rtx (V8HImode);
1144
1145 emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
1146 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
1147 emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and));
1148 emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short));
1149 emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
1150 DONE;
1151 }")
1152
1153 (define_insn "sub<mode>3"
1154 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
1155 (minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B")
1156 (match_operand:VHSI 2 "spu_reg_operand" "r,r")))]
1157 ""
1158 "@
1159 sf<bh>\t%0,%2,%1
1160 sf<bh>i\t%0,%2,%1")
1161
1162 (define_expand "sub<mode>3"
1163 [(set (match_dup:VDI 3)
1164 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
1165 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG))
1166 (set (match_dup:VDI 5)
1167 (unspec:VDI [(match_dup 3)
1168 (match_dup 3)
1169 (match_dup:TI 4)] UNSPEC_SHUFB))
1170 (set (match_operand:VDI 0 "spu_reg_operand" "")
1171 (unspec:VDI [(match_dup 1)
1172 (match_dup 2)
1173 (match_dup 5)] UNSPEC_SFX))]
1174 ""
1175 {
1176 unsigned char pat[16] = {
1177 0x04, 0x05, 0x06, 0x07,
1178 0xc0, 0xc0, 0xc0, 0xc0,
1179 0x0c, 0x0d, 0x0e, 0x0f,
1180 0xc0, 0xc0, 0xc0, 0xc0
1181 };
1182 operands[3] = gen_reg_rtx (<MODE>mode);
1183 operands[4] = gen_reg_rtx (TImode);
1184 operands[5] = gen_reg_rtx (<MODE>mode);
1185 emit_move_insn (operands[4], array_to_constant (TImode, pat));
1186 })
1187
1188 (define_insn "bg_<mode>"
1189 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1190 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1191 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))]
1192 "operands != NULL"
1193 "bg\t%0,%2,%1")
1194
1195 (define_insn "bgx_<mode>"
1196 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1197 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1198 (match_operand 2 "spu_reg_operand" "r")
1199 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))]
1200 "operands != NULL"
1201 "bgx\t%0,%2,%1")
1202
1203 (define_insn "sfx_<mode>"
1204 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1205 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1206 (match_operand 2 "spu_reg_operand" "r")
1207 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))]
1208 "operands != NULL"
1209 "sfx\t%0,%2,%1")
1210
1211 (define_insn "subti3"
1212 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
1213 (minus:TI (match_operand:TI 1 "spu_reg_operand" "r")
1214 (match_operand:TI 2 "spu_reg_operand" "r")))
1215 (clobber (match_scratch:TI 3 "=&r"))
1216 (clobber (match_scratch:TI 4 "=&r"))
1217 (clobber (match_scratch:TI 5 "=&r"))
1218 (clobber (match_scratch:TI 6 "=&r"))]
1219 ""
1220 "il\t%6,1\n\\
1221 bg\t%3,%2,%1\n\\
1222 xor\t%3,%3,%6\n\\
1223 sf\t%4,%2,%1\n\\
1224 shlqbyi\t%5,%3,4\n\\
1225 bg\t%3,%5,%4\n\\
1226 xor\t%3,%3,%6\n\\
1227 sf\t%4,%5,%4\n\\
1228 shlqbyi\t%5,%3,4\n\\
1229 bg\t%3,%5,%4\n\\
1230 xor\t%3,%3,%6\n\\
1231 sf\t%4,%5,%4\n\\
1232 shlqbyi\t%5,%3,4\n\\
1233 sf\t%0,%5,%4"
1234 [(set_attr "type" "multi0")
1235 (set_attr "length" "56")])
1236
1237 (define_insn "sub<mode>3"
1238 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1239 (minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1240 (match_operand:VSF 2 "spu_reg_operand" "r")))]
1241 ""
1242 "fs\t%0,%1,%2"
1243 [(set_attr "type" "fp6")])
1244
1245 (define_insn "sub<mode>3"
1246 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1247 (minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1248 (match_operand:VDF 2 "spu_reg_operand" "r")))]
1249 ""
1250 "dfs\t%0,%1,%2"
1251 [(set_attr "type" "fpd")])
1252
1253 \f
1254 ;; neg
1255
1256 (define_expand "negv16qi2"
1257 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1258 (neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))]
1259 ""
1260 "{
1261 rtx zero = gen_reg_rtx (V16QImode);
1262 emit_move_insn (zero, CONST0_RTX (V16QImode));
1263 emit_insn (gen_subv16qi3 (operands[0], zero, operands[1]));
1264 DONE;
1265 }")
1266
1267 (define_insn "neg<mode>2"
1268 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
1269 (neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))]
1270 ""
1271 "sf<bh>i\t%0,%1,0")
1272
1273 (define_expand "negdi2"
1274 [(set (match_operand:DI 0 "spu_reg_operand" "")
1275 (neg:DI (match_operand:DI 1 "spu_reg_operand" "")))]
1276 ""
1277 {
1278 rtx zero = gen_reg_rtx(DImode);
1279 emit_move_insn(zero, GEN_INT(0));
1280 emit_insn (gen_subdi3(operands[0], zero, operands[1]));
1281 DONE;
1282 })
1283
1284 (define_expand "negti2"
1285 [(set (match_operand:TI 0 "spu_reg_operand" "")
1286 (neg:TI (match_operand:TI 1 "spu_reg_operand" "")))]
1287 ""
1288 {
1289 rtx zero = gen_reg_rtx(TImode);
1290 emit_move_insn(zero, GEN_INT(0));
1291 emit_insn (gen_subti3(operands[0], zero, operands[1]));
1292 DONE;
1293 })
1294
1295 (define_expand "neg<mode>2"
1296 [(parallel
1297 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1298 (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1299 (use (match_dup 2))])]
1300 ""
1301 "operands[2] = gen_reg_rtx (<F2I>mode);
1302 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
1303
1304 (define_expand "neg<mode>2"
1305 [(parallel
1306 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1307 (neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1308 (use (match_dup 2))])]
1309 ""
1310 "operands[2] = gen_reg_rtx (<F2I>mode);
1311 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
1312
1313 (define_insn_and_split "_neg<mode>2"
1314 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1315 (neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1316 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1317 ""
1318 "#"
1319 ""
1320 [(set (match_dup:<F2I> 3)
1321 (xor:<F2I> (match_dup:<F2I> 4)
1322 (match_dup:<F2I> 2)))]
1323 {
1324 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1325 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1326 })
1327
1328 \f
1329 ;; abs
1330
1331 (define_expand "abs<mode>2"
1332 [(parallel
1333 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1334 (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1335 (use (match_dup 2))])]
1336 ""
1337 "operands[2] = gen_reg_rtx (<F2I>mode);
1338 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
1339
1340 (define_expand "abs<mode>2"
1341 [(parallel
1342 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1343 (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1344 (use (match_dup 2))])]
1345 ""
1346 "operands[2] = gen_reg_rtx (<F2I>mode);
1347 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
1348
1349 (define_insn_and_split "_abs<mode>2"
1350 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1351 (abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1352 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1353 ""
1354 "#"
1355 ""
1356 [(set (match_dup:<F2I> 3)
1357 (and:<F2I> (match_dup:<F2I> 4)
1358 (match_dup:<F2I> 2)))]
1359 {
1360 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1361 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1362 })
1363
1364 \f
1365 ;; mul
1366
1367 (define_insn "mulhi3"
1368 [(set (match_operand:HI 0 "spu_reg_operand" "=r,r")
1369 (mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r")
1370 (match_operand:HI 2 "spu_arith_operand" "r,B")))]
1371 ""
1372 "@
1373 mpy\t%0,%1,%2
1374 mpyi\t%0,%1,%2"
1375 [(set_attr "type" "fp7")])
1376
1377 (define_expand "mulv8hi3"
1378 [(set (match_operand:V8HI 0 "spu_reg_operand" "")
1379 (mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "")
1380 (match_operand:V8HI 2 "spu_reg_operand" "")))]
1381 ""
1382 "{
1383 rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1384 rtx low = gen_reg_rtx (V4SImode);
1385 rtx high = gen_reg_rtx (V4SImode);
1386 rtx shift = gen_reg_rtx (V4SImode);
1387 rtx mask = gen_reg_rtx (V4SImode);
1388
1389 emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
1390 emit_insn (gen_vec_widen_smult_even_v8hi (high, operands[1], operands[2]));
1391 emit_insn (gen_vec_widen_smult_odd_v8hi (low, operands[1], operands[2]));
1392 emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16)));
1393 emit_insn (gen_selb (result, shift, low, mask));
1394 DONE;
1395 }")
1396
1397 (define_expand "mul<mode>3"
1398 [(parallel
1399 [(set (match_operand:VSI 0 "spu_reg_operand" "")
1400 (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "")
1401 (match_operand:VSI 2 "spu_reg_operand" "")))
1402 (clobber (match_dup:VSI 3))
1403 (clobber (match_dup:VSI 4))
1404 (clobber (match_dup:VSI 5))
1405 (clobber (match_dup:VSI 6))])]
1406 ""
1407 {
1408 operands[3] = gen_reg_rtx(<MODE>mode);
1409 operands[4] = gen_reg_rtx(<MODE>mode);
1410 operands[5] = gen_reg_rtx(<MODE>mode);
1411 operands[6] = gen_reg_rtx(<MODE>mode);
1412 })
1413
1414 (define_insn_and_split "_mulsi3"
1415 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1416 (mult:SI (match_operand:SI 1 "spu_reg_operand" "r")
1417 (match_operand:SI 2 "spu_arith_operand" "rK")))
1418 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))
1419 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
1420 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))
1421 (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))]
1422 ""
1423 "#"
1424 ""
1425 [(set (match_dup:SI 0)
1426 (mult:SI (match_dup:SI 1)
1427 (match_dup:SI 2)))]
1428 {
1429 HOST_WIDE_INT val = 0;
1430 rtx a = operands[3];
1431 rtx b = operands[4];
1432 rtx c = operands[5];
1433 rtx d = operands[6];
1434 if (GET_CODE(operands[2]) == CONST_INT)
1435 {
1436 val = INTVAL(operands[2]);
1437 emit_move_insn(d, operands[2]);
1438 operands[2] = d;
1439 }
1440 if (val && (val & 0xffff) == 0)
1441 {
1442 emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1]));
1443 }
1444 else if (val > 0 && val < 0x10000)
1445 {
1446 rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d;
1447 emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
1448 emit_insn (gen_mpyu_si(c, operands[1], cst));
1449 emit_insn (gen_addsi3(operands[0], a, c));
1450 }
1451 else
1452 {
1453 emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
1454 emit_insn (gen_mpyh_si(b, operands[2], operands[1]));
1455 emit_insn (gen_mpyu_si(c, operands[1], operands[2]));
1456 emit_insn (gen_addsi3(d, a, b));
1457 emit_insn (gen_addsi3(operands[0], d, c));
1458 }
1459 DONE;
1460 })
1461
1462 (define_insn_and_split "_mulv4si3"
1463 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
1464 (mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r")
1465 (match_operand:V4SI 2 "spu_reg_operand" "r")))
1466 (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r"))
1467 (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r"))
1468 (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r"))
1469 (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))]
1470 ""
1471 "#"
1472 ""
1473 [(set (match_dup:V4SI 0)
1474 (mult:V4SI (match_dup:V4SI 1)
1475 (match_dup:V4SI 2)))]
1476 {
1477 rtx a = operands[3];
1478 rtx b = operands[4];
1479 rtx c = operands[5];
1480 rtx d = operands[6];
1481 rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0);
1482 rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
1483 emit_insn (gen_spu_mpyh(a, op1, op2));
1484 emit_insn (gen_spu_mpyh(b, op2, op1));
1485 emit_insn (gen_vec_widen_umult_odd_v8hi (c, op1, op2));
1486 emit_insn (gen_addv4si3(d, a, b));
1487 emit_insn (gen_addv4si3(operands[0], d, c));
1488 DONE;
1489 })
1490
1491 (define_insn "mulhisi3"
1492 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1493 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1494 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1495 ""
1496 "mpy\t%0,%1,%2"
1497 [(set_attr "type" "fp7")])
1498
1499 (define_insn "mulhisi3_imm"
1500 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1501 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1502 (match_operand:SI 2 "imm_K_operand" "K")))]
1503 ""
1504 "mpyi\t%0,%1,%2"
1505 [(set_attr "type" "fp7")])
1506
1507 (define_insn "umulhisi3"
1508 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1509 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1510 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1511 ""
1512 "mpyu\t%0,%1,%2"
1513 [(set_attr "type" "fp7")])
1514
1515 (define_insn "umulhisi3_imm"
1516 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1517 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1518 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))]
1519 ""
1520 "mpyui\t%0,%1,%2"
1521 [(set_attr "type" "fp7")])
1522
1523 (define_insn "mpyu_si"
1524 [(set (match_operand:SI 0 "spu_reg_operand" "=r,r")
1525 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r")
1526 (const_int 65535))
1527 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K")
1528 (const_int 65535))))]
1529 ""
1530 "@
1531 mpyu\t%0,%1,%2
1532 mpyui\t%0,%1,%2"
1533 [(set_attr "type" "fp7")])
1534
1535 ;; This isn't always profitable to use. Consider r = a * b + c * d.
1536 ;; It's faster to do the multiplies in parallel then add them. If we
1537 ;; merge a multiply and add it prevents the multiplies from happening in
1538 ;; parallel.
1539 (define_insn "mpya_si"
1540 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1541 (plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1542 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1543 (match_operand:SI 3 "spu_reg_operand" "r")))]
1544 "0"
1545 "mpya\t%0,%1,%2,%3"
1546 [(set_attr "type" "fp7")])
1547
1548 (define_insn "mpyh_si"
1549 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1550 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r")
1551 (const_int -65536))
1552 (and:SI (match_operand:SI 2 "spu_reg_operand" "r")
1553 (const_int 65535))))]
1554 ""
1555 "mpyh\t%0,%1,%2"
1556 [(set_attr "type" "fp7")])
1557
1558 (define_insn "mpys_si"
1559 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1560 (ashiftrt:SI
1561 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1562 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1563 (const_int 16)))]
1564 ""
1565 "mpys\t%0,%1,%2"
1566 [(set_attr "type" "fp7")])
1567
1568 (define_insn "mpyhh_si"
1569 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1570 (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1571 (const_int 16))
1572 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1573 (const_int 16))))]
1574 ""
1575 "mpyhh\t%0,%1,%2"
1576 [(set_attr "type" "fp7")])
1577
1578 (define_insn "mpyhhu_si"
1579 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1580 (mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1581 (const_int 16))
1582 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1583 (const_int 16))))]
1584 ""
1585 "mpyhhu\t%0,%1,%2"
1586 [(set_attr "type" "fp7")])
1587
1588 (define_insn "mpyhha_si"
1589 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1590 (plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1591 (const_int 16))
1592 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1593 (const_int 16)))
1594 (match_operand:SI 3 "spu_reg_operand" "0")))]
1595 "0"
1596 "mpyhha\t%0,%1,%2"
1597 [(set_attr "type" "fp7")])
1598
1599 (define_insn "mul<mode>3"
1600 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1601 (mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")
1602 (match_operand:VSDF 2 "spu_reg_operand" "r")))]
1603 ""
1604 "<d>fm\t%0,%1,%2"
1605 [(set_attr "type" "fp<d6>")])
1606
1607 (define_insn "fma<mode>4"
1608 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1609 (fma:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1610 (match_operand:VSF 2 "spu_reg_operand" "r")
1611 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1612 ""
1613 "fma\t%0,%1,%2,%3"
1614 [(set_attr "type" "fp6")])
1615
1616 ;; ??? The official description is (c - a*b), which is exactly (-a*b + c).
1617 ;; Note that this doesn't match the dfnms description. Incorrect?
1618 (define_insn "fnma<mode>4"
1619 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1620 (fma:VSF
1621 (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
1622 (match_operand:VSF 2 "spu_reg_operand" "r")
1623 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1624 ""
1625 "fnms\t%0,%1,%2,%3"
1626 [(set_attr "type" "fp6")])
1627
1628 (define_insn "fms<mode>4"
1629 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1630 (fma:VSF
1631 (match_operand:VSF 1 "spu_reg_operand" "r")
1632 (match_operand:VSF 2 "spu_reg_operand" "r")
1633 (neg:VSF (match_operand:VSF 3 "spu_reg_operand" "r"))))]
1634 ""
1635 "fms\t%0,%1,%2,%3"
1636 [(set_attr "type" "fp6")])
1637
1638 (define_insn "fma<mode>4"
1639 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1640 (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1641 (match_operand:VDF 2 "spu_reg_operand" "r")
1642 (match_operand:VDF 3 "spu_reg_operand" "0")))]
1643 ""
1644 "dfma\t%0,%1,%2"
1645 [(set_attr "type" "fpd")])
1646
1647 (define_insn "fms<mode>4"
1648 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1649 (fma:VDF
1650 (match_operand:VDF 1 "spu_reg_operand" "r")
1651 (match_operand:VDF 2 "spu_reg_operand" "r")
1652 (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0"))))]
1653 ""
1654 "dfms\t%0,%1,%2"
1655 [(set_attr "type" "fpd")])
1656
1657 (define_insn "nfma<mode>4"
1658 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1659 (neg:VDF
1660 (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1661 (match_operand:VDF 2 "spu_reg_operand" "r")
1662 (match_operand:VDF 3 "spu_reg_operand" "0"))))]
1663 ""
1664 "dfnma\t%0,%1,%2"
1665 [(set_attr "type" "fpd")])
1666
1667 (define_insn "nfms<mode>4"
1668 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1669 (neg:VDF
1670 (fma:VDF
1671 (match_operand:VDF 1 "spu_reg_operand" "r")
1672 (match_operand:VDF 2 "spu_reg_operand" "r")
1673 (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0")))))]
1674 ""
1675 "dfnms\t%0,%1,%2"
1676 [(set_attr "type" "fpd")])
1677
1678 ;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
1679 (define_expand "fnma<mode>4"
1680 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1681 (neg:VDF
1682 (fma:VDF
1683 (match_operand:VDF 1 "spu_reg_operand" "")
1684 (match_operand:VDF 2 "spu_reg_operand" "")
1685 (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "")))))]
1686 "!HONOR_SIGNED_ZEROS (<MODE>mode)"
1687 "")
1688
1689 ;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
1690 (define_expand "fnms<mode>4"
1691 [(set (match_operand:VDF 0 "register_operand" "")
1692 (neg:VDF
1693 (fma:VDF
1694 (match_operand:VDF 1 "register_operand" "")
1695 (match_operand:VDF 2 "register_operand" "")
1696 (match_operand:VDF 3 "register_operand" ""))))]
1697 "!HONOR_SIGNED_ZEROS (<MODE>mode)"
1698 "")
1699 \f
1700 ;; mul highpart, used for divide by constant optimizations.
1701
1702 (define_expand "smulsi3_highpart"
1703 [(set (match_operand:SI 0 "register_operand" "")
1704 (truncate:SI
1705 (ashiftrt:DI
1706 (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
1707 (sign_extend:DI (match_operand:SI 2 "register_operand" "")))
1708 (const_int 32))))]
1709 ""
1710 {
1711 rtx t0 = gen_reg_rtx (SImode);
1712 rtx t1 = gen_reg_rtx (SImode);
1713 rtx t2 = gen_reg_rtx (SImode);
1714 rtx t3 = gen_reg_rtx (SImode);
1715 rtx t4 = gen_reg_rtx (SImode);
1716 rtx t5 = gen_reg_rtx (SImode);
1717 rtx t6 = gen_reg_rtx (SImode);
1718 rtx t7 = gen_reg_rtx (SImode);
1719 rtx t8 = gen_reg_rtx (SImode);
1720 rtx t9 = gen_reg_rtx (SImode);
1721 rtx t11 = gen_reg_rtx (SImode);
1722 rtx t12 = gen_reg_rtx (SImode);
1723 rtx t14 = gen_reg_rtx (SImode);
1724 rtx t15 = gen_reg_rtx (HImode);
1725 rtx t16 = gen_reg_rtx (HImode);
1726 rtx t17 = gen_reg_rtx (HImode);
1727 rtx t18 = gen_reg_rtx (HImode);
1728 rtx t19 = gen_reg_rtx (SImode);
1729 rtx t20 = gen_reg_rtx (SImode);
1730 rtx t21 = gen_reg_rtx (SImode);
1731 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1732 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1733 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1734 rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2);
1735
1736 rtx_insn *insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16)));
1737 emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16)));
1738 emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi));
1739 emit_insn (gen_mpyh_si (t3, operands[1], operands[2]));
1740 emit_insn (gen_mpyh_si (t4, operands[2], operands[1]));
1741 emit_insn (gen_mpyhh_si (t5, operands[1], operands[2]));
1742 emit_insn (gen_mpys_si (t6, t0_hi, op2_hi));
1743 emit_insn (gen_mpys_si (t7, t1_hi, op1_hi));
1744
1745 /* Gen carry bits (in t9 and t11). */
1746 emit_insn (gen_addsi3 (t8, t2, t3));
1747 emit_insn (gen_cg_si (t9, t2, t3));
1748 emit_insn (gen_cg_si (t11, t8, t4));
1749
1750 /* Gen high 32 bits in operand[0]. Correct for mpys. */
1751 emit_insn (gen_addx_si (t12, t5, t6, t9));
1752 emit_insn (gen_addx_si (t14, t12, t7, t11));
1753
1754 /* mpys treats both operands as signed when we really want it to treat
1755 the first operand as signed and the second operand as unsigned.
1756 The code below corrects for that difference. */
1757 emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1)));
1758 emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1)));
1759 emit_insn (gen_andc_hi (t17, t1_hi, t15));
1760 emit_insn (gen_andc_hi (t18, t0_hi, t16));
1761 emit_insn (gen_extendhisi2 (t19, t17));
1762 emit_insn (gen_extendhisi2 (t20, t18));
1763 emit_insn (gen_addsi3 (t21, t19, t20));
1764 emit_insn (gen_addsi3 (operands[0], t14, t21));
1765 unshare_all_rtl_in_chain (insn);
1766 DONE;
1767 })
1768
1769 (define_expand "umulsi3_highpart"
1770 [(set (match_operand:SI 0 "register_operand" "")
1771 (truncate:SI
1772 (ashiftrt:DI
1773 (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
1774 (zero_extend:DI (match_operand:SI 2 "register_operand" "")))
1775 (const_int 32))))]
1776 ""
1777
1778 {
1779 rtx t0 = gen_reg_rtx (SImode);
1780 rtx t1 = gen_reg_rtx (SImode);
1781 rtx t2 = gen_reg_rtx (SImode);
1782 rtx t3 = gen_reg_rtx (SImode);
1783 rtx t4 = gen_reg_rtx (SImode);
1784 rtx t5 = gen_reg_rtx (SImode);
1785 rtx t6 = gen_reg_rtx (SImode);
1786 rtx t7 = gen_reg_rtx (SImode);
1787 rtx t8 = gen_reg_rtx (SImode);
1788 rtx t9 = gen_reg_rtx (SImode);
1789 rtx t10 = gen_reg_rtx (SImode);
1790 rtx t12 = gen_reg_rtx (SImode);
1791 rtx t13 = gen_reg_rtx (SImode);
1792 rtx t14 = gen_reg_rtx (SImode);
1793 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1794 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1795 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1796
1797 rtx_insn *insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16)));
1798 emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi));
1799 emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi));
1800 emit_insn (gen_mpyhhu_si (t3, operands[1], t0));
1801 emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2]));
1802 emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16)));
1803 emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16)));
1804 emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16)));
1805 emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16)));
1806
1807 /* Gen carry bits (in t10 and t12). */
1808 emit_insn (gen_addsi3 (t9, t1, t5));
1809 emit_insn (gen_cg_si (t10, t1, t5));
1810 emit_insn (gen_cg_si (t12, t9, t6));
1811
1812 /* Gen high 32 bits in operand[0]. */
1813 emit_insn (gen_addx_si (t13, t4, t7, t10));
1814 emit_insn (gen_addx_si (t14, t13, t8, t12));
1815 emit_insn (gen_movsi (operands[0], t14));
1816 unshare_all_rtl_in_chain (insn);
1817
1818 DONE;
1819 })
1820 \f
1821 ;; div
1822
1823 ;; Not necessarily the best implementation of divide but faster then
1824 ;; the default that gcc provides because this is inlined and it uses
1825 ;; clz.
1826 (define_insn "divmodsi4"
1827 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1828 (div:SI (match_operand:SI 1 "spu_reg_operand" "r")
1829 (match_operand:SI 2 "spu_reg_operand" "r")))
1830 (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1831 (mod:SI (match_dup 1)
1832 (match_dup 2)))
1833 (clobber (match_scratch:SI 4 "=&r"))
1834 (clobber (match_scratch:SI 5 "=&r"))
1835 (clobber (match_scratch:SI 6 "=&r"))
1836 (clobber (match_scratch:SI 7 "=&r"))
1837 (clobber (match_scratch:SI 8 "=&r"))
1838 (clobber (match_scratch:SI 9 "=&r"))
1839 (clobber (match_scratch:SI 10 "=&r"))
1840 (clobber (match_scratch:SI 11 "=&r"))
1841 (clobber (match_scratch:SI 12 "=&r"))
1842 (clobber (reg:SI 130))]
1843 ""
1844 "heqi %2,0\\n\\
1845 hbrr 3f,1f\\n\\
1846 sfi %8,%1,0\\n\\
1847 sfi %9,%2,0\\n\\
1848 cgti %10,%1,-1\\n\\
1849 cgti %11,%2,-1\\n\\
1850 selb %8,%8,%1,%10\\n\\
1851 selb %9,%9,%2,%11\\n\\
1852 clz %4,%8\\n\\
1853 clz %7,%9\\n\\
1854 il %5,1\\n\\
1855 fsmbi %0,0\\n\\
1856 sf %7,%4,%7\\n\\
1857 shlqbyi %3,%8,0\\n\\
1858 xor %11,%10,%11\\n\\
1859 shl %5,%5,%7\\n\\
1860 shl %4,%9,%7\\n\\
1861 lnop \\n\\
1862 1: or %12,%0,%5\\n\\
1863 rotqmbii %5,%5,-1\\n\\
1864 clgt %6,%4,%3\\n\\
1865 lnop \\n\\
1866 sf %7,%4,%3\\n\\
1867 rotqmbii %4,%4,-1\\n\\
1868 selb %0,%12,%0,%6\\n\\
1869 lnop \\n\\
1870 selb %3,%7,%3,%6\\n\\
1871 3: brnz %5,1b\\n\\
1872 2: sfi %8,%3,0\\n\\
1873 sfi %9,%0,0\\n\\
1874 selb %3,%8,%3,%10\\n\\
1875 selb %0,%0,%9,%11"
1876 [(set_attr "type" "multi0")
1877 (set_attr "length" "128")])
1878
1879 (define_insn "udivmodsi4"
1880 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1881 (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r")
1882 (match_operand:SI 2 "spu_reg_operand" "r")))
1883 (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1884 (umod:SI (match_dup 1)
1885 (match_dup 2)))
1886 (clobber (match_scratch:SI 4 "=&r"))
1887 (clobber (match_scratch:SI 5 "=&r"))
1888 (clobber (match_scratch:SI 6 "=&r"))
1889 (clobber (match_scratch:SI 7 "=&r"))
1890 (clobber (match_scratch:SI 8 "=&r"))
1891 (clobber (reg:SI 130))]
1892 ""
1893 "heqi %2,0\\n\\
1894 hbrr 3f,1f\\n\\
1895 clz %7,%2\\n\\
1896 clz %4,%1\\n\\
1897 il %5,1\\n\\
1898 fsmbi %0,0\\n\\
1899 sf %7,%4,%7\\n\\
1900 ori %3,%1,0\\n\\
1901 shl %5,%5,%7\\n\\
1902 shl %4,%2,%7\\n\\
1903 1: or %8,%0,%5\\n\\
1904 rotqmbii %5,%5,-1\\n\\
1905 clgt %6,%4,%3\\n\\
1906 lnop \\n\\
1907 sf %7,%4,%3\\n\\
1908 rotqmbii %4,%4,-1\\n\\
1909 selb %0,%8,%0,%6\\n\\
1910 lnop \\n\\
1911 selb %3,%7,%3,%6\\n\\
1912 3: brnz %5,1b\\n\\
1913 2:"
1914 [(set_attr "type" "multi0")
1915 (set_attr "length" "80")])
1916
1917 (define_expand "div<mode>3"
1918 [(parallel
1919 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1920 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "")
1921 (match_operand:VSF 2 "spu_reg_operand" "")))
1922 (clobber (match_scratch:VSF 3 ""))
1923 (clobber (match_scratch:VSF 4 ""))
1924 (clobber (match_scratch:VSF 5 ""))])]
1925 ""
1926 "")
1927
1928 (define_insn_and_split "*div<mode>3_fast"
1929 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1930 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1931 (match_operand:VSF 2 "spu_reg_operand" "r")))
1932 (clobber (match_scratch:VSF 3 "=&r"))
1933 (clobber (match_scratch:VSF 4 "=&r"))
1934 (clobber (scratch:VSF))]
1935 "flag_unsafe_math_optimizations"
1936 "#"
1937 "reload_completed"
1938 [(set (match_dup:VSF 0)
1939 (div:VSF (match_dup:VSF 1)
1940 (match_dup:VSF 2)))
1941 (clobber (match_dup:VSF 3))
1942 (clobber (match_dup:VSF 4))
1943 (clobber (scratch:VSF))]
1944 {
1945 emit_insn (gen_frest_<mode>(operands[3], operands[2]));
1946 emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
1947 emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3]));
1948 emit_insn (gen_fnma<mode>4(operands[0], operands[4], operands[2], operands[1]));
1949 emit_insn (gen_fma<mode>4(operands[0], operands[0], operands[3], operands[4]));
1950 DONE;
1951 })
1952
1953 (define_insn_and_split "*div<mode>3_adjusted"
1954 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1955 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1956 (match_operand:VSF 2 "spu_reg_operand" "r")))
1957 (clobber (match_scratch:VSF 3 "=&r"))
1958 (clobber (match_scratch:VSF 4 "=&r"))
1959 (clobber (match_scratch:VSF 5 "=&r"))]
1960 "!flag_unsafe_math_optimizations"
1961 "#"
1962 "reload_completed"
1963 [(set (match_dup:VSF 0)
1964 (div:VSF (match_dup:VSF 1)
1965 (match_dup:VSF 2)))
1966 (clobber (match_dup:VSF 3))
1967 (clobber (match_dup:VSF 4))
1968 (clobber (match_dup:VSF 5))]
1969 {
1970 emit_insn (gen_frest_<mode> (operands[3], operands[2]));
1971 emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3]));
1972 emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3]));
1973 emit_insn (gen_fnma<mode>4 (operands[5], operands[4], operands[2], operands[1]));
1974 emit_insn (gen_fma<mode>4 (operands[3], operands[5], operands[3], operands[4]));
1975
1976 /* Due to truncation error, the quotient result may be low by 1 ulp.
1977 Conditionally add one if the estimate is too small in magnitude. */
1978
1979 emit_move_insn (gen_lowpart (<F2I>mode, operands[4]),
1980 spu_const (<F2I>mode, 0x80000000ULL));
1981 emit_move_insn (gen_lowpart (<F2I>mode, operands[5]),
1982 spu_const (<F2I>mode, 0x3f800000ULL));
1983 emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4]));
1984
1985 emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]),
1986 gen_lowpart (<F2I>mode, operands[3]),
1987 spu_const (<F2I>mode, 1)));
1988 emit_insn (gen_fnma<mode>4 (operands[0], operands[2], operands[4], operands[1]));
1989 emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5]));
1990 emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]),
1991 gen_lowpart (<F2I>mode, operands[0]),
1992 spu_const (<F2I>mode, -1)));
1993 emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0]));
1994 DONE;
1995 })
1996
1997 \f
1998 ;; sqrt
1999
2000 (define_insn_and_split "sqrtsf2"
2001 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
2002 (sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r")))
2003 (clobber (match_scratch:SF 2 "=&r"))
2004 (clobber (match_scratch:SF 3 "=&r"))
2005 (clobber (match_scratch:SF 4 "=&r"))
2006 (clobber (match_scratch:SF 5 "=&r"))]
2007 ""
2008 "#"
2009 "reload_completed"
2010 [(set (match_dup:SF 0)
2011 (sqrt:SF (match_dup:SF 1)))
2012 (clobber (match_dup:SF 2))
2013 (clobber (match_dup:SF 3))
2014 (clobber (match_dup:SF 4))
2015 (clobber (match_dup:SF 5))]
2016 {
2017 emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode));
2018 emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode));
2019 emit_insn (gen_frsqest_sf(operands[2],operands[1]));
2020 emit_insn (gen_fi_sf(operands[2],operands[1],operands[2]));
2021 emit_insn (gen_mulsf3(operands[5],operands[2],operands[1]));
2022 emit_insn (gen_mulsf3(operands[3],operands[5],operands[3]));
2023 emit_insn (gen_fnmasf4(operands[4],operands[2],operands[5],operands[4]));
2024 emit_insn (gen_fmasf4(operands[0],operands[4],operands[3],operands[5]));
2025 DONE;
2026 })
2027
2028 (define_insn "frest_<mode>"
2029 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
2030 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))]
2031 ""
2032 "frest\t%0,%1"
2033 [(set_attr "type" "shuf")])
2034
2035 (define_insn "frsqest_<mode>"
2036 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
2037 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))]
2038 ""
2039 "frsqest\t%0,%1"
2040 [(set_attr "type" "shuf")])
2041
2042 (define_insn "fi_<mode>"
2043 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
2044 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")
2045 (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))]
2046 ""
2047 "fi\t%0,%1,%2"
2048 [(set_attr "type" "fp7")])
2049
2050 \f
2051 ;; and
2052
2053 (define_insn "and<mode>3"
2054 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
2055 (and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
2056 (match_operand:MOV 2 "spu_logical_operand" "r,C")))]
2057 ""
2058 "@
2059 and\t%0,%1,%2
2060 and%j2i\t%0,%1,%J2")
2061
2062 (define_insn "anddi3"
2063 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2064 (and:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2065 (match_operand:DI 2 "spu_logical_operand" "r,c")))]
2066 ""
2067 "@
2068 and\t%0,%1,%2
2069 and%k2i\t%0,%1,%K2")
2070
2071 (define_insn "andti3"
2072 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2073 (and:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2074 (match_operand:TI 2 "spu_logical_operand" "r,Y")))]
2075 ""
2076 "@
2077 and\t%0,%1,%2
2078 and%m2i\t%0,%1,%L2")
2079
2080 (define_insn "andc_<mode>"
2081 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2082 (and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
2083 (match_operand:ALL 1 "spu_reg_operand" "r")))]
2084 ""
2085 "andc\t%0,%1,%2")
2086
2087 (define_insn "nand_<mode>"
2088 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2089 (not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r")
2090 (match_operand:ALL 1 "spu_reg_operand" "r"))))]
2091 ""
2092 "nand\t%0,%1,%2")
2093
2094 \f
2095 ;; ior
2096
2097 (define_insn "ior<mode>3"
2098 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r")
2099 (ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0")
2100 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))]
2101 ""
2102 "@
2103 or\t%0,%1,%2
2104 or%j2i\t%0,%1,%J2
2105 iohl\t%0,%J2")
2106
2107 (define_insn "iordi3"
2108 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r")
2109 (ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0")
2110 (match_operand:DI 2 "spu_ior_operand" "r,c,d")))]
2111 ""
2112 "@
2113 or\t%0,%1,%2
2114 or%k2i\t%0,%1,%K2
2115 iohl\t%0,%K2")
2116
2117 (define_insn "iorti3"
2118 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r")
2119 (ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0")
2120 (match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))]
2121 ""
2122 "@
2123 or\t%0,%1,%2
2124 or%m2i\t%0,%1,%L2
2125 iohl\t%0,%L2")
2126
2127 (define_insn "orc_<mode>"
2128 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2129 (ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
2130 (match_operand:ALL 1 "spu_reg_operand" "r")))]
2131 ""
2132 "orc\t%0,%1,%2")
2133
2134 (define_insn "nor_<mode>"
2135 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2136 (not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
2137 (match_operand:ALL 2 "spu_reg_operand" "r"))))]
2138 ""
2139 "nor\t%0,%1,%2")
2140 \f
2141 ;; xor
2142
2143 (define_insn "xor<mode>3"
2144 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
2145 (xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
2146 (match_operand:MOV 2 "spu_logical_operand" "r,B")))]
2147 ""
2148 "@
2149 xor\t%0,%1,%2
2150 xor%j2i\t%0,%1,%J2")
2151
2152 (define_insn "xordi3"
2153 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2154 (xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2155 (match_operand:DI 2 "spu_logical_operand" "r,c")))]
2156 ""
2157 "@
2158 xor\t%0,%1,%2
2159 xor%k2i\t%0,%1,%K2")
2160
2161 (define_insn "xorti3"
2162 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2163 (xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2164 (match_operand:TI 2 "spu_logical_operand" "r,Y")))]
2165 ""
2166 "@
2167 xor\t%0,%1,%2
2168 xor%m2i\t%0,%1,%L2")
2169
2170 (define_insn "eqv_<mode>"
2171 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2172 (not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
2173 (match_operand:ALL 2 "spu_reg_operand" "r"))))]
2174 ""
2175 "eqv\t%0,%1,%2")
2176 \f
2177 ;; one_cmpl
2178
2179 (define_insn "one_cmpl<mode>2"
2180 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2181 (not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))]
2182 ""
2183 "nor\t%0,%1,%1")
2184
2185 \f
2186 ;; selb
2187
2188 (define_expand "selb"
2189 [(set (match_operand 0 "spu_reg_operand" "")
2190 (unspec [(match_operand 1 "spu_reg_operand" "")
2191 (match_operand 2 "spu_reg_operand" "")
2192 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))]
2193 ""
2194 {
2195 rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]);
2196 PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
2197 emit_insn (s);
2198 DONE;
2199 })
2200
2201 ;; This could be defined as a combination of logical operations, but at
2202 ;; one time it caused a crash due to recursive expansion of rtl during CSE.
2203 (define_insn "_selb"
2204 [(set (match_operand 0 "spu_reg_operand" "=r")
2205 (unspec [(match_operand 1 "spu_reg_operand" "r")
2206 (match_operand 2 "spu_reg_operand" "r")
2207 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))]
2208 "GET_MODE(operands[0]) == GET_MODE(operands[1])
2209 && GET_MODE(operands[1]) == GET_MODE(operands[2])"
2210 "selb\t%0,%1,%2,%3")
2211
2212 \f
2213 ;; Misc. byte/bit operations
2214 ;; clz/ctz/ffs/popcount/parity
2215 ;; cntb/sumb
2216
2217 (define_insn "clz<mode>2"
2218 [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
2219 (clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))]
2220 ""
2221 "clz\t%0,%1")
2222
2223 (define_expand "ctz<mode>2"
2224 [(set (match_dup 2)
2225 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
2226 (set (match_dup 3) (and:VSI (match_dup 1)
2227 (match_dup 2)))
2228 (set (match_dup 4) (clz:VSI (match_dup 3)))
2229 (set (match_operand:VSI 0 "spu_reg_operand" "")
2230 (minus:VSI (match_dup 5) (match_dup 4)))]
2231 ""
2232 {
2233 operands[2] = gen_reg_rtx (<MODE>mode);
2234 operands[3] = gen_reg_rtx (<MODE>mode);
2235 operands[4] = gen_reg_rtx (<MODE>mode);
2236 operands[5] = spu_const(<MODE>mode, 31);
2237 })
2238
2239 (define_expand "clrsb<mode>2"
2240 [(set (match_dup 2)
2241 (gt:VSI (match_operand:VSI 1 "spu_reg_operand" "") (match_dup 5)))
2242 (set (match_dup 3) (not:VSI (xor:VSI (match_dup 1) (match_dup 2))))
2243 (set (match_dup 4) (clz:VSI (match_dup 3)))
2244 (set (match_operand:VSI 0 "spu_reg_operand")
2245 (plus:VSI (match_dup 4) (match_dup 5)))]
2246 ""
2247 {
2248 operands[2] = gen_reg_rtx (<MODE>mode);
2249 operands[3] = gen_reg_rtx (<MODE>mode);
2250 operands[4] = gen_reg_rtx (<MODE>mode);
2251 operands[5] = spu_const(<MODE>mode, -1);
2252 })
2253
2254 (define_expand "ffs<mode>2"
2255 [(set (match_dup 2)
2256 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
2257 (set (match_dup 3) (and:VSI (match_dup 1)
2258 (match_dup 2)))
2259 (set (match_dup 4) (clz:VSI (match_dup 3)))
2260 (set (match_operand:VSI 0 "spu_reg_operand" "")
2261 (minus:VSI (match_dup 5) (match_dup 4)))]
2262 ""
2263 {
2264 operands[2] = gen_reg_rtx (<MODE>mode);
2265 operands[3] = gen_reg_rtx (<MODE>mode);
2266 operands[4] = gen_reg_rtx (<MODE>mode);
2267 operands[5] = spu_const(<MODE>mode, 32);
2268 })
2269
2270 (define_expand "popcountsi2"
2271 [(set (match_dup 2)
2272 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")]
2273 UNSPEC_CNTB))
2274 (set (match_dup 3)
2275 (unspec:HI [(match_dup 2)] UNSPEC_SUMB))
2276 (set (match_operand:SI 0 "spu_reg_operand" "")
2277 (sign_extend:SI (match_dup 3)))]
2278 ""
2279 {
2280 operands[2] = gen_reg_rtx (SImode);
2281 operands[3] = gen_reg_rtx (HImode);
2282 })
2283
2284 (define_expand "paritysi2"
2285 [(set (match_operand:SI 0 "spu_reg_operand" "")
2286 (parity:SI (match_operand:SI 1 "spu_reg_operand" "")))]
2287 ""
2288 {
2289 operands[2] = gen_reg_rtx (SImode);
2290 emit_insn (gen_popcountsi2(operands[2], operands[1]));
2291 emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1)));
2292 DONE;
2293 })
2294
2295 (define_insn "cntb_si"
2296 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2297 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")]
2298 UNSPEC_CNTB))]
2299 ""
2300 "cntb\t%0,%1"
2301 [(set_attr "type" "fxb")])
2302
2303 (define_insn "cntb_v16qi"
2304 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
2305 (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")]
2306 UNSPEC_CNTB))]
2307 ""
2308 "cntb\t%0,%1"
2309 [(set_attr "type" "fxb")])
2310
2311 (define_insn "sumb_si"
2312 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
2313 (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))]
2314 ""
2315 "sumb\t%0,%1,%1"
2316 [(set_attr "type" "fxb")])
2317
2318 \f
2319 ;; ashl, vashl
2320
2321 (define_insn "<v>ashl<mode>3"
2322 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2323 (ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2324 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
2325 ""
2326 "@
2327 shl<bh>\t%0,%1,%2
2328 shl<bh>i\t%0,%1,%<umask>2"
2329 [(set_attr "type" "fx3")])
2330
2331 (define_insn_and_split "ashldi3"
2332 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2333 (ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2334 (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
2335 (clobber (match_scratch:SI 3 "=&r,X"))]
2336 ""
2337 "#"
2338 "reload_completed"
2339 [(set (match_dup:DI 0)
2340 (ashift:DI (match_dup:DI 1)
2341 (match_dup:SI 2)))]
2342 {
2343 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
2344 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
2345 rtx op2 = operands[2];
2346 rtx op3 = operands[3];
2347
2348 if (GET_CODE (operands[2]) == REG)
2349 {
2350 emit_insn (gen_addsi3 (op3, op2, GEN_INT (64)));
2351 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
2352 emit_insn (gen_shlqbybi_ti (op0, op0, op3));
2353 emit_insn (gen_shlqbi_ti (op0, op0, op3));
2354 }
2355 else
2356 {
2357 HOST_WIDE_INT val = INTVAL (operands[2]);
2358 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
2359 emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8)));
2360 if (val % 8)
2361 emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8)));
2362 }
2363 DONE;
2364 })
2365
2366 (define_expand "ashlti3"
2367 [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "")
2368 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "")
2369 (match_operand:SI 2 "spu_nonmem_operand" "")))
2370 (clobber (match_dup:TI 3))])]
2371 ""
2372 "if (GET_CODE (operands[2]) == CONST_INT)
2373 {
2374 emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2]));
2375 DONE;
2376 }
2377 operands[3] = gen_reg_rtx (TImode);")
2378
2379 (define_insn_and_split "ashlti3_imm"
2380 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2381 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2382 (match_operand:SI 2 "immediate_operand" "O,P")))]
2383 ""
2384 "@
2385 shlqbyi\t%0,%1,%h2
2386 shlqbii\t%0,%1,%e2"
2387 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
2388 [(set (match_dup:TI 0)
2389 (ashift:TI (match_dup:TI 1)
2390 (match_dup:SI 3)))
2391 (set (match_dup:TI 0)
2392 (ashift:TI (match_dup:TI 0)
2393 (match_dup:SI 4)))]
2394 {
2395 HOST_WIDE_INT val = INTVAL(operands[2]);
2396 operands[3] = GEN_INT (val&7);
2397 operands[4] = GEN_INT (val&-8);
2398 }
2399 [(set_attr "type" "shuf,shuf")])
2400
2401 (define_insn_and_split "ashlti3_reg"
2402 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2403 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r")
2404 (match_operand:SI 2 "spu_reg_operand" "r")))
2405 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
2406 ""
2407 "#"
2408 ""
2409 [(set (match_dup:TI 3)
2410 (ashift:TI (match_dup:TI 1)
2411 (and:SI (match_dup:SI 2)
2412 (const_int 7))))
2413 (set (match_dup:TI 0)
2414 (ashift:TI (match_dup:TI 3)
2415 (and:SI (match_dup:SI 2)
2416 (const_int -8))))]
2417 "")
2418
2419 (define_insn "shlqbybi_ti"
2420 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2421 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2422 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2423 (const_int -8))))]
2424 ""
2425 "@
2426 shlqbybi\t%0,%1,%2
2427 shlqbyi\t%0,%1,%h2"
2428 [(set_attr "type" "shuf,shuf")])
2429
2430 (define_insn "shlqbi_ti"
2431 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2432 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2433 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2434 (const_int 7))))]
2435 ""
2436 "@
2437 shlqbi\t%0,%1,%2
2438 shlqbii\t%0,%1,%e2"
2439 [(set_attr "type" "shuf,shuf")])
2440
2441 (define_insn "shlqby_ti"
2442 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2443 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2444 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2445 (const_int 8))))]
2446 ""
2447 "@
2448 shlqby\t%0,%1,%2
2449 shlqbyi\t%0,%1,%f2"
2450 [(set_attr "type" "shuf,shuf")])
2451
2452 \f
2453 ;; lshr, vlshr
2454
2455 (define_insn_and_split "<v>lshr<mode>3"
2456 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2457 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2458 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2459 (clobber (match_scratch:VHSI 3 "=&r,X"))]
2460 ""
2461 "@
2462 #
2463 rot<bh>mi\t%0,%1,-%<umask>2"
2464 "reload_completed && GET_CODE (operands[2]) == REG"
2465 [(set (match_dup:VHSI 3)
2466 (neg:VHSI (match_dup:VHSI 2)))
2467 (set (match_dup:VHSI 0)
2468 (lshiftrt:VHSI (match_dup:VHSI 1)
2469 (neg:VHSI (match_dup:VHSI 3))))]
2470 ""
2471 [(set_attr "type" "*,fx3")])
2472
2473 (define_insn "<v>lshr<mode>3_imm"
2474 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
2475 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
2476 (match_operand:VHSI 2 "immediate_operand" "W")))]
2477 ""
2478 "rot<bh>mi\t%0,%1,-%<umask>2"
2479 [(set_attr "type" "fx3")])
2480
2481 (define_insn "rotm_<mode>"
2482 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2483 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2484 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2485 ""
2486 "@
2487 rot<bh>m\t%0,%1,%2
2488 rot<bh>mi\t%0,%1,-%<nmask>2"
2489 [(set_attr "type" "fx3")])
2490
2491 (define_insn_and_split "lshr<mode>3"
2492 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r,r")
2493 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r,r")
2494 (match_operand:SI 2 "spu_nonmem_operand" "r,O,P")))]
2495 ""
2496 "@
2497 #
2498 rotqmbyi\t%0,%1,-%h2
2499 rotqmbii\t%0,%1,-%e2"
2500 "REG_P (operands[2]) || (!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2]))"
2501 [(set (match_dup:DTI 3)
2502 (lshiftrt:DTI (match_dup:DTI 1)
2503 (match_dup:SI 4)))
2504 (set (match_dup:DTI 0)
2505 (lshiftrt:DTI (match_dup:DTI 3)
2506 (match_dup:SI 5)))]
2507 {
2508 operands[3] = gen_reg_rtx (<MODE>mode);
2509 if (GET_CODE (operands[2]) == CONST_INT)
2510 {
2511 HOST_WIDE_INT val = INTVAL(operands[2]);
2512 operands[4] = GEN_INT (val & 7);
2513 operands[5] = GEN_INT (val & -8);
2514 }
2515 else
2516 {
2517 rtx t0 = gen_reg_rtx (SImode);
2518 rtx t1 = gen_reg_rtx (SImode);
2519 emit_insn (gen_subsi3(t0, GEN_INT(0), operands[2]));
2520 emit_insn (gen_subsi3(t1, GEN_INT(7), operands[2]));
2521 operands[4] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, t0), GEN_INT (7));
2522 operands[5] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, gen_rtx_AND (SImode, t1, GEN_INT (-8))), GEN_INT (-8));
2523 }
2524 }
2525 [(set_attr "type" "*,shuf,shuf")])
2526
2527 (define_expand "shrqbybi_<mode>"
2528 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2529 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2530 (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2531 (const_int -8)))
2532 (const_int -8))))]
2533 ""
2534 {
2535 if (GET_CODE (operands[2]) == CONST_INT)
2536 operands[2] = GEN_INT (7 - INTVAL (operands[2]));
2537 else
2538 {
2539 rtx t0 = gen_reg_rtx (SImode);
2540 emit_insn (gen_subsi3 (t0, GEN_INT (7), operands[2]));
2541 operands[2] = t0;
2542 }
2543 })
2544
2545 (define_insn "rotqmbybi_<mode>"
2546 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2547 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2548 (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2549 (const_int -8)))
2550 (const_int -8))))]
2551 ""
2552 "@
2553 rotqmbybi\t%0,%1,%2
2554 rotqmbyi\t%0,%1,-%H2"
2555 [(set_attr "type" "shuf")])
2556
2557 (define_insn_and_split "shrqbi_<mode>"
2558 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2559 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2560 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2561 (const_int 7))))
2562 (clobber (match_scratch:SI 3 "=&r,X"))]
2563 ""
2564 "#"
2565 "reload_completed"
2566 [(set (match_dup:DTI 0)
2567 (lshiftrt:DTI (match_dup:DTI 1)
2568 (and:SI (neg:SI (match_dup:SI 3)) (const_int 7))))]
2569 {
2570 if (GET_CODE (operands[2]) == CONST_INT)
2571 operands[3] = GEN_INT (-INTVAL (operands[2]));
2572 else
2573 emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2]));
2574 }
2575 [(set_attr "type" "shuf")])
2576
2577 (define_insn "rotqmbi_<mode>"
2578 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2579 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2580 (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2581 (const_int 7))))]
2582 ""
2583 "@
2584 rotqmbi\t%0,%1,%2
2585 rotqmbii\t%0,%1,-%E2"
2586 [(set_attr "type" "shuf")])
2587
2588 (define_expand "shrqby_<mode>"
2589 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2590 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2591 (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2592 (const_int 8))))]
2593 ""
2594 {
2595 if (GET_CODE (operands[2]) == CONST_INT)
2596 operands[2] = GEN_INT (-INTVAL (operands[2]));
2597 else
2598 {
2599 rtx t0 = gen_reg_rtx (SImode);
2600 emit_insn (gen_subsi3 (t0, GEN_INT (0), operands[2]));
2601 operands[2] = t0;
2602 }
2603 })
2604
2605 (define_insn "rotqmby_<mode>"
2606 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2607 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2608 (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2609 (const_int 8))))]
2610 ""
2611 "@
2612 rotqmby\t%0,%1,%2
2613 rotqmbyi\t%0,%1,-%F2"
2614 [(set_attr "type" "shuf")])
2615
2616 \f
2617 ;; ashr, vashr
2618
2619 (define_insn_and_split "<v>ashr<mode>3"
2620 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2621 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2622 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2623 (clobber (match_scratch:VHSI 3 "=&r,X"))]
2624 ""
2625 "@
2626 #
2627 rotma<bh>i\t%0,%1,-%<umask>2"
2628 "reload_completed && GET_CODE (operands[2]) == REG"
2629 [(set (match_dup:VHSI 3)
2630 (neg:VHSI (match_dup:VHSI 2)))
2631 (set (match_dup:VHSI 0)
2632 (ashiftrt:VHSI (match_dup:VHSI 1)
2633 (neg:VHSI (match_dup:VHSI 3))))]
2634 ""
2635 [(set_attr "type" "*,fx3")])
2636
2637 (define_insn "<v>ashr<mode>3_imm"
2638 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
2639 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
2640 (match_operand:VHSI 2 "immediate_operand" "W")))]
2641 ""
2642 "rotma<bh>i\t%0,%1,-%<umask>2"
2643 [(set_attr "type" "fx3")])
2644
2645
2646 (define_insn "rotma_<mode>"
2647 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2648 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2649 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2650 ""
2651 "@
2652 rotma<bh>\t%0,%1,%2
2653 rotma<bh>i\t%0,%1,-%<nmask>2"
2654 [(set_attr "type" "fx3")])
2655
2656 (define_insn_and_split "ashrdi3"
2657 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2658 (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2659 (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
2660 (clobber (match_scratch:TI 3 "=&r,&r"))
2661 (clobber (match_scratch:TI 4 "=&r,&r"))
2662 (clobber (match_scratch:SI 5 "=&r,&r"))]
2663 ""
2664 "#"
2665 "reload_completed"
2666 [(set (match_dup:DI 0)
2667 (ashiftrt:DI (match_dup:DI 1)
2668 (match_dup:SI 2)))]
2669 {
2670 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
2671 rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0));
2672 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
2673 rtx op1s = gen_rtx_REG (SImode, REGNO (op1));
2674 rtx op2 = operands[2];
2675 rtx op3 = operands[3];
2676 rtx op4 = operands[4];
2677 rtx op5 = operands[5];
2678
2679 if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63)
2680 {
2681 rtx op0s = gen_rtx_REG (SImode, REGNO (op0));
2682 emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32)));
2683 emit_insn (gen_spu_fsm (op0v, op0s));
2684 }
2685 else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32)
2686 {
2687 rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0));
2688 HOST_WIDE_INT val = INTVAL (op2);
2689 emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
2690 emit_insn (gen_spu_xswd (op0d, op0v));
2691 if (val > 32)
2692 emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
2693 }
2694 else
2695 {
2696 rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3));
2697 unsigned char arr[16] = {
2698 0xff, 0xff, 0xff, 0xff,
2699 0xff, 0xff, 0xff, 0xff,
2700 0x00, 0x00, 0x00, 0x00,
2701 0x00, 0x00, 0x00, 0x00
2702 };
2703
2704 emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31)));
2705 emit_move_insn (op4, array_to_constant (TImode, arr));
2706 emit_insn (gen_spu_fsm (op3v, op5));
2707
2708 if (GET_CODE (operands[2]) == REG)
2709 {
2710 emit_insn (gen_selb (op4, op3, op1, op4));
2711 emit_insn (gen_negsi2 (op5, op2));
2712 emit_insn (gen_rotqbybi_ti (op0, op4, op5));
2713 emit_insn (gen_rotqbi_ti (op0, op0, op5));
2714 }
2715 else
2716 {
2717 HOST_WIDE_INT val = -INTVAL (op2);
2718 emit_insn (gen_selb (op0, op3, op1, op4));
2719 if ((val - 7) / 8)
2720 emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8)));
2721 if (val % 8)
2722 emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8)));
2723 }
2724 }
2725 DONE;
2726 })
2727
2728
2729 (define_insn_and_split "ashrti3"
2730 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2731 (ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2732 (match_operand:SI 2 "spu_nonmem_operand" "r,i")))]
2733 ""
2734 "#"
2735 ""
2736 [(set (match_dup:TI 0)
2737 (ashiftrt:TI (match_dup:TI 1)
2738 (match_dup:SI 2)))]
2739 {
2740 rtx sign_shift = gen_reg_rtx (SImode);
2741 rtx sign_mask = gen_reg_rtx (TImode);
2742 rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0);
2743 rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
2744 rtx t = gen_reg_rtx (TImode);
2745 emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
2746 emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
2747 emit_insn (gen_fsm_ti (sign_mask, sign_mask));
2748 emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
2749 emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
2750 emit_insn (gen_iorti3 (operands[0], t, sign_mask));
2751 DONE;
2752 })
2753
2754 ;; fsm is used after rotam to replicate the sign across the whole register.
2755 (define_insn "fsm_ti"
2756 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2757 (unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
2758 ""
2759 "fsm\t%0,%1"
2760 [(set_attr "type" "shuf")])
2761
2762 \f
2763 ;; vrotl, rotl
2764
2765 (define_insn "<v>rotl<mode>3"
2766 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2767 (rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2768 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
2769 ""
2770 "@
2771 rot<bh>\t%0,%1,%2
2772 rot<bh>i\t%0,%1,%<umask>2"
2773 [(set_attr "type" "fx3")])
2774
2775 (define_insn "rotlti3"
2776 [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r")
2777 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r")
2778 (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))]
2779 ""
2780 "@
2781 rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2
2782 rotqbyi\t%0,%1,%h2
2783 rotqbii\t%0,%1,%e2
2784 rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2"
2785 [(set_attr "length" "8,4,4,8")
2786 (set_attr "type" "multi1,shuf,shuf,multi1")])
2787
2788 (define_insn "rotqbybi_ti"
2789 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2790 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2791 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2792 (const_int -8))))]
2793 ""
2794 "@
2795 rotqbybi\t%0,%1,%2
2796 rotqbyi\t%0,%1,%h2"
2797 [(set_attr "type" "shuf,shuf")])
2798
2799 (define_insn "rotqby_ti"
2800 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2801 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2802 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2803 (const_int 8))))]
2804 ""
2805 "@
2806 rotqby\t%0,%1,%2
2807 rotqbyi\t%0,%1,%f2"
2808 [(set_attr "type" "shuf,shuf")])
2809
2810 (define_insn "rotqbi_ti"
2811 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2812 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2813 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2814 (const_int 7))))]
2815 ""
2816 "@
2817 rotqbi\t%0,%1,%2
2818 rotqbii\t%0,%1,%e2"
2819 [(set_attr "type" "shuf,shuf")])
2820
2821 \f
2822 ;; struct extract/insert
2823 ;; We handle mem's because GCC will generate invalid SUBREG's
2824 ;; and inefficient code.
2825
2826 (define_expand "extv"
2827 [(set (match_operand:TI 0 "register_operand" "")
2828 (sign_extract:TI (match_operand 1 "nonimmediate_operand" "")
2829 (match_operand:SI 2 "const_int_operand" "")
2830 (match_operand:SI 3 "const_int_operand" "")))]
2831 ""
2832 {
2833 spu_expand_extv (operands, 0);
2834 DONE;
2835 })
2836
2837 (define_expand "extzv"
2838 [(set (match_operand:TI 0 "register_operand" "")
2839 (zero_extract:TI (match_operand 1 "nonimmediate_operand" "")
2840 (match_operand:SI 2 "const_int_operand" "")
2841 (match_operand:SI 3 "const_int_operand" "")))]
2842 ""
2843 {
2844 spu_expand_extv (operands, 1);
2845 DONE;
2846 })
2847
2848 (define_expand "insv"
2849 [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
2850 (match_operand:SI 1 "const_int_operand" "")
2851 (match_operand:SI 2 "const_int_operand" ""))
2852 (match_operand 3 "nonmemory_operand" ""))]
2853 ""
2854 {
2855 if (INTVAL (operands[1]) + INTVAL (operands[2])
2856 > GET_MODE_BITSIZE (GET_MODE (operands[0])))
2857 FAIL;
2858 spu_expand_insv(operands);
2859 DONE;
2860 })
2861
2862 ;; Simplify a number of patterns that get generated by extv, extzv,
2863 ;; insv, and loads.
2864 (define_insn_and_split "trunc_shr_ti<mode>"
2865 [(set (match_operand:QHSI 0 "spu_reg_operand" "=r")
2866 (truncate:QHSI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
2867 (const_int 96)])))]
2868 ""
2869 "#"
2870 "reload_completed"
2871 [(const_int 0)]
2872 {
2873 spu_split_convert (operands);
2874 DONE;
2875 }
2876 [(set_attr "type" "convert")
2877 (set_attr "length" "0")])
2878
2879 (define_insn_and_split "trunc_shr_tidi"
2880 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
2881 (truncate:DI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
2882 (const_int 64)])))]
2883 ""
2884 "#"
2885 "reload_completed"
2886 [(const_int 0)]
2887 {
2888 spu_split_convert (operands);
2889 DONE;
2890 }
2891 [(set_attr "type" "convert")
2892 (set_attr "length" "0")])
2893
2894 (define_insn_and_split "shl_ext_<mode>ti"
2895 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2896 (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:QHSI 1 "spu_reg_operand" "0")])
2897 (const_int 96)))]
2898 ""
2899 "#"
2900 "reload_completed"
2901 [(const_int 0)]
2902 {
2903 spu_split_convert (operands);
2904 DONE;
2905 }
2906 [(set_attr "type" "convert")
2907 (set_attr "length" "0")])
2908
2909 (define_insn_and_split "shl_ext_diti"
2910 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2911 (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:DI 1 "spu_reg_operand" "0")])
2912 (const_int 64)))]
2913 ""
2914 "#"
2915 "reload_completed"
2916 [(const_int 0)]
2917 {
2918 spu_split_convert (operands);
2919 DONE;
2920 }
2921 [(set_attr "type" "convert")
2922 (set_attr "length" "0")])
2923
2924 (define_insn "sext_trunc_lshr_tiqisi"
2925 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2926 (sign_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2927 (const_int 120)]))))]
2928 ""
2929 "rotmai\t%0,%1,-24"
2930 [(set_attr "type" "fx3")])
2931
2932 (define_insn "zext_trunc_lshr_tiqisi"
2933 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2934 (zero_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2935 (const_int 120)]))))]
2936 ""
2937 "rotmi\t%0,%1,-24"
2938 [(set_attr "type" "fx3")])
2939
2940 (define_insn "sext_trunc_lshr_tihisi"
2941 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2942 (sign_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2943 (const_int 112)]))))]
2944 ""
2945 "rotmai\t%0,%1,-16"
2946 [(set_attr "type" "fx3")])
2947
2948 (define_insn "zext_trunc_lshr_tihisi"
2949 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2950 (zero_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2951 (const_int 112)]))))]
2952 ""
2953 "rotmi\t%0,%1,-16"
2954 [(set_attr "type" "fx3")])
2955
2956 \f
2957 ;; String/block move insn.
2958 ;; Argument 0 is the destination
2959 ;; Argument 1 is the source
2960 ;; Argument 2 is the length
2961 ;; Argument 3 is the alignment
2962
2963 (define_expand "movstrsi"
2964 [(parallel [(set (match_operand:BLK 0 "" "")
2965 (match_operand:BLK 1 "" ""))
2966 (use (match_operand:SI 2 "" ""))
2967 (use (match_operand:SI 3 "" ""))])]
2968 ""
2969 "
2970 {
2971 if (spu_expand_block_move (operands))
2972 DONE;
2973 else
2974 FAIL;
2975 }")
2976
2977 \f
2978 ;; jump
2979
2980 (define_insn "indirect_jump"
2981 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))]
2982 ""
2983 "bi\t%0"
2984 [(set_attr "type" "br")])
2985
2986 (define_insn "jump"
2987 [(set (pc)
2988 (label_ref (match_operand 0 "" "")))]
2989 ""
2990 "br\t%0"
2991 [(set_attr "type" "br")])
2992
2993 \f
2994 ;; return
2995
2996 ;; This will be used for leaf functions, that don't save any regs and
2997 ;; don't have locals on stack, maybe... that is for functions that
2998 ;; don't change $sp and don't need to save $lr.
2999 (define_expand "return"
3000 [(return)]
3001 "direct_return()"
3002 "")
3003
3004 ;; used in spu_expand_epilogue to generate return from a function and
3005 ;; explicitly set use of $lr.
3006
3007 (define_insn "_return"
3008 [(return)]
3009 ""
3010 "bi\t$lr"
3011 [(set_attr "type" "br")])
3012
3013
3014 \f
3015 ;; ceq
3016
3017 (define_insn "ceq_<mode>"
3018 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
3019 (eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
3020 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
3021 ""
3022 "@
3023 ceq<bh>\t%0,%1,%2
3024 ceq<bh>i\t%0,%1,%2")
3025
3026 (define_insn_and_split "ceq_di"
3027 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3028 (eq:SI (match_operand:DI 1 "spu_reg_operand" "r")
3029 (match_operand:DI 2 "spu_reg_operand" "r")))]
3030 ""
3031 "#"
3032 "reload_completed"
3033 [(set (match_dup:SI 0)
3034 (eq:SI (match_dup:DI 1)
3035 (match_dup:DI 2)))]
3036 {
3037 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
3038 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
3039 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
3040 emit_insn (gen_ceq_v4si (op0, op1, op2));
3041 emit_insn (gen_spu_gb (op0, op0));
3042 emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11)));
3043 DONE;
3044 })
3045
3046
3047 ;; We provide the TI compares for completeness and because some parts of
3048 ;; gcc/libgcc use them, even though user code might never see it.
3049 (define_insn "ceq_ti"
3050 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3051 (eq:SI (match_operand:TI 1 "spu_reg_operand" "r")
3052 (match_operand:TI 2 "spu_reg_operand" "r")))]
3053 ""
3054 "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
3055 [(set_attr "type" "multi0")
3056 (set_attr "length" "12")])
3057
3058 (define_insn "ceq_<mode>"
3059 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3060 (eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
3061 (match_operand:VSF 2 "spu_reg_operand" "r")))]
3062 ""
3063 "fceq\t%0,%1,%2")
3064
3065 (define_insn "cmeq_<mode>"
3066 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3067 (eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
3068 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
3069 ""
3070 "fcmeq\t%0,%1,%2")
3071
3072 ;; These implementations will ignore checking of NaN or INF if
3073 ;; compiled with option -ffinite-math-only.
3074 (define_expand "ceq_df"
3075 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3076 (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
3077 (match_operand:DF 2 "const_zero_operand" "i")))]
3078 ""
3079 {
3080 if (spu_arch == PROCESSOR_CELL)
3081 {
3082 rtx ra = gen_reg_rtx (V4SImode);
3083 rtx rb = gen_reg_rtx (V4SImode);
3084 rtx temp = gen_reg_rtx (TImode);
3085 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3086 rtx temp2 = gen_reg_rtx (V4SImode);
3087 rtx biteq = gen_reg_rtx (V4SImode);
3088 rtx ahi_inf = gen_reg_rtx (V4SImode);
3089 rtx a_nan = gen_reg_rtx (V4SImode);
3090 rtx a_abs = gen_reg_rtx (V4SImode);
3091 rtx b_abs = gen_reg_rtx (V4SImode);
3092 rtx iszero = gen_reg_rtx (V4SImode);
3093 rtx sign_mask = gen_reg_rtx (V4SImode);
3094 rtx nan_mask = gen_reg_rtx (V4SImode);
3095 rtx hihi_promote = gen_reg_rtx (TImode);
3096 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3097 0x7FFFFFFF, 0xFFFFFFFF);
3098
3099 emit_move_insn (sign_mask, pat);
3100 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3101 0x7FF00000, 0x0);
3102 emit_move_insn (nan_mask, pat);
3103 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
3104 0x08090A0B, 0x18191A1B);
3105 emit_move_insn (hihi_promote, pat);
3106
3107 emit_insn (gen_spu_convert (ra, operands[1]));
3108 emit_insn (gen_spu_convert (rb, operands[2]));
3109 emit_insn (gen_ceq_v4si (biteq, ra, rb));
3110 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
3111 GEN_INT (4 * 8)));
3112 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
3113
3114 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3115 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3116 if (!flag_finite_math_only)
3117 {
3118 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3119 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
3120 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3121 GEN_INT (4 * 8)));
3122 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
3123 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3124 }
3125 emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
3126 emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
3127 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3128 GEN_INT (4 * 8)));
3129 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3130 emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
3131 if (!flag_finite_math_only)
3132 {
3133 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3134 }
3135 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
3136 DONE;
3137 }
3138 })
3139
3140 (define_insn "ceq_<mode>_celledp"
3141 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3142 (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
3143 (match_operand:VDF 2 "spu_reg_operand" "r")))]
3144 "spu_arch == PROCESSOR_CELLEDP"
3145 "dfceq\t%0,%1,%2"
3146 [(set_attr "type" "fpd")])
3147
3148 (define_insn "cmeq_<mode>_celledp"
3149 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3150 (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
3151 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
3152 "spu_arch == PROCESSOR_CELLEDP"
3153 "dfcmeq\t%0,%1,%2"
3154 [(set_attr "type" "fpd")])
3155
3156 (define_expand "ceq_v2df"
3157 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3158 (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
3159 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
3160 ""
3161 {
3162 if (spu_arch == PROCESSOR_CELL)
3163 {
3164 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3165 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3166 rtx temp = gen_reg_rtx (TImode);
3167 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3168 rtx temp2 = gen_reg_rtx (V4SImode);
3169 rtx biteq = gen_reg_rtx (V4SImode);
3170 rtx ahi_inf = gen_reg_rtx (V4SImode);
3171 rtx a_nan = gen_reg_rtx (V4SImode);
3172 rtx a_abs = gen_reg_rtx (V4SImode);
3173 rtx b_abs = gen_reg_rtx (V4SImode);
3174 rtx iszero = gen_reg_rtx (V4SImode);
3175 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3176 0x7FFFFFFF, 0xFFFFFFFF);
3177 rtx sign_mask = gen_reg_rtx (V4SImode);
3178 rtx nan_mask = gen_reg_rtx (V4SImode);
3179 rtx hihi_promote = gen_reg_rtx (TImode);
3180
3181 emit_move_insn (sign_mask, pat);
3182 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3183 0x7FF00000, 0x0);
3184 emit_move_insn (nan_mask, pat);
3185 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
3186 0x08090A0B, 0x18191A1B);
3187 emit_move_insn (hihi_promote, pat);
3188
3189 emit_insn (gen_ceq_v4si (biteq, ra, rb));
3190 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
3191 GEN_INT (4 * 8)));
3192 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
3193 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3194 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3195 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3196 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
3197 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3198 GEN_INT (4 * 8)));
3199 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
3200 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3201 emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
3202 emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
3203 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3204 GEN_INT (4 * 8)));
3205 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3206 emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
3207 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3208 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
3209 DONE;
3210 }
3211 })
3212
3213 (define_expand "cmeq_v2df"
3214 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3215 (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
3216 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
3217 ""
3218 {
3219 if (spu_arch == PROCESSOR_CELL)
3220 {
3221 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3222 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3223 rtx temp = gen_reg_rtx (TImode);
3224 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3225 rtx temp2 = gen_reg_rtx (V4SImode);
3226 rtx biteq = gen_reg_rtx (V4SImode);
3227 rtx ahi_inf = gen_reg_rtx (V4SImode);
3228 rtx a_nan = gen_reg_rtx (V4SImode);
3229 rtx a_abs = gen_reg_rtx (V4SImode);
3230 rtx b_abs = gen_reg_rtx (V4SImode);
3231
3232 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3233 0x7FFFFFFF, 0xFFFFFFFF);
3234 rtx sign_mask = gen_reg_rtx (V4SImode);
3235 rtx nan_mask = gen_reg_rtx (V4SImode);
3236 rtx hihi_promote = gen_reg_rtx (TImode);
3237
3238 emit_move_insn (sign_mask, pat);
3239
3240 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3241 0x7FF00000, 0x0);
3242 emit_move_insn (nan_mask, pat);
3243 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
3244 0x08090A0B, 0x18191A1B);
3245 emit_move_insn (hihi_promote, pat);
3246
3247 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3248 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3249 emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
3250 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
3251 GEN_INT (4 * 8)));
3252 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
3253 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3254 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
3255 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3256 GEN_INT (4 * 8)));
3257 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
3258 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3259 emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
3260 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
3261 DONE;
3262 }
3263 })
3264
3265 \f
3266 ;; cgt
3267
3268 (define_insn "cgt_<mode>"
3269 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
3270 (gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
3271 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
3272 ""
3273 "@
3274 cgt<bh>\t%0,%1,%2
3275 cgt<bh>i\t%0,%1,%2")
3276
3277 (define_insn "cgt_di_m1"
3278 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3279 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
3280 (const_int -1)))]
3281 ""
3282 "cgti\t%0,%1,-1")
3283
3284 (define_insn_and_split "cgt_di"
3285 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3286 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
3287 (match_operand:DI 2 "spu_reg_operand" "r")))
3288 (clobber (match_scratch:V4SI 3 "=&r"))
3289 (clobber (match_scratch:V4SI 4 "=&r"))
3290 (clobber (match_scratch:V4SI 5 "=&r"))]
3291 ""
3292 "#"
3293 "reload_completed"
3294 [(set (match_dup:SI 0)
3295 (gt:SI (match_dup:DI 1)
3296 (match_dup:DI 2)))]
3297 {
3298 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
3299 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
3300 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
3301 rtx op3 = operands[3];
3302 rtx op4 = operands[4];
3303 rtx op5 = operands[5];
3304 rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3]));
3305 emit_insn (gen_clgt_v4si (op3, op1, op2));
3306 emit_insn (gen_ceq_v4si (op4, op1, op2));
3307 emit_insn (gen_cgt_v4si (op5, op1, op2));
3308 emit_insn (gen_spu_xswd (op3d, op3));
3309 emit_insn (gen_selb (op0, op5, op3, op4));
3310 DONE;
3311 })
3312
3313 (define_insn "cgt_ti_m1"
3314 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3315 (gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
3316 (const_int -1)))]
3317 ""
3318 "cgti\t%0,%1,-1")
3319
3320 (define_insn "cgt_ti"
3321 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3322 (gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
3323 (match_operand:TI 2 "spu_reg_operand" "r")))
3324 (clobber (match_scratch:V4SI 3 "=&r"))
3325 (clobber (match_scratch:V4SI 4 "=&r"))
3326 (clobber (match_scratch:V4SI 5 "=&r"))]
3327 ""
3328 "clgt\t%4,%1,%2\;\
3329 ceq\t%3,%1,%2\;\
3330 cgt\t%5,%1,%2\;\
3331 shlqbyi\t%0,%4,4\;\
3332 selb\t%0,%4,%0,%3\;\
3333 shlqbyi\t%0,%0,4\;\
3334 selb\t%0,%4,%0,%3\;\
3335 shlqbyi\t%0,%0,4\;\
3336 selb\t%0,%5,%0,%3"
3337 [(set_attr "type" "multi0")
3338 (set_attr "length" "36")])
3339
3340 (define_insn "cgt_<mode>"
3341 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3342 (gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
3343 (match_operand:VSF 2 "spu_reg_operand" "r")))]
3344 ""
3345 "fcgt\t%0,%1,%2")
3346
3347 (define_insn "cmgt_<mode>"
3348 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3349 (gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
3350 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
3351 ""
3352 "fcmgt\t%0,%1,%2")
3353
3354 (define_expand "cgt_df"
3355 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3356 (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
3357 (match_operand:DF 2 "const_zero_operand" "i")))]
3358 ""
3359 {
3360 if (spu_arch == PROCESSOR_CELL)
3361 {
3362 rtx ra = gen_reg_rtx (V4SImode);
3363 rtx rb = gen_reg_rtx (V4SImode);
3364 rtx zero = gen_reg_rtx (V4SImode);
3365 rtx temp = gen_reg_rtx (TImode);
3366 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3367 rtx temp2 = gen_reg_rtx (V4SImode);
3368 rtx hi_inf = gen_reg_rtx (V4SImode);
3369 rtx a_nan = gen_reg_rtx (V4SImode);
3370 rtx b_nan = gen_reg_rtx (V4SImode);
3371 rtx a_abs = gen_reg_rtx (V4SImode);
3372 rtx b_abs = gen_reg_rtx (V4SImode);
3373 rtx asel = gen_reg_rtx (V4SImode);
3374 rtx bsel = gen_reg_rtx (V4SImode);
3375 rtx abor = gen_reg_rtx (V4SImode);
3376 rtx bbor = gen_reg_rtx (V4SImode);
3377 rtx gt_hi = gen_reg_rtx (V4SImode);
3378 rtx gt_lo = gen_reg_rtx (V4SImode);
3379 rtx sign_mask = gen_reg_rtx (V4SImode);
3380 rtx nan_mask = gen_reg_rtx (V4SImode);
3381 rtx hi_promote = gen_reg_rtx (TImode);
3382 rtx borrow_shuffle = gen_reg_rtx (TImode);
3383
3384 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3385 0x7FFFFFFF, 0xFFFFFFFF);
3386 emit_move_insn (sign_mask, pat);
3387 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3388 0x7FF00000, 0x0);
3389 emit_move_insn (nan_mask, pat);
3390 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3391 0x08090A0B, 0x08090A0B);
3392 emit_move_insn (hi_promote, pat);
3393 pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
3394 0x0C0D0E0F, 0xC0C0C0C0);
3395 emit_move_insn (borrow_shuffle, pat);
3396
3397 emit_insn (gen_spu_convert (ra, operands[1]));
3398 emit_insn (gen_spu_convert (rb, operands[2]));
3399 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3400 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3401
3402 if (!flag_finite_math_only)
3403 {
3404 /* check if ra is NaN */
3405 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
3406 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3407 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3408 GEN_INT (4 * 8)));
3409 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3410 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3411 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
3412
3413 /* check if rb is NaN */
3414 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
3415 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
3416 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
3417 GEN_INT (4 * 8)));
3418 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3419 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
3420 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
3421
3422 /* check if ra or rb is NaN */
3423 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
3424 }
3425 emit_move_insn (zero, CONST0_RTX (V4SImode));
3426 emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
3427 emit_insn (gen_shufb (asel, asel, asel, hi_promote));
3428 emit_insn (gen_bg_v4si (abor, zero, a_abs));
3429 emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
3430 emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
3431 emit_insn (gen_selb (abor, a_abs, abor, asel));
3432
3433 emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
3434 emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
3435 emit_insn (gen_bg_v4si (bbor, zero, b_abs));
3436 emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
3437 emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
3438 emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
3439
3440 emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
3441 emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
3442 emit_insn (gen_ceq_v4si (temp2, abor, bbor));
3443 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
3444 GEN_INT (4 * 8)));
3445 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
3446 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
3447 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
3448 if (!flag_finite_math_only)
3449 {
3450 /* correct for NaNs */
3451 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3452 }
3453 emit_insn (gen_spu_convert (operands[0], temp2));
3454 DONE;
3455 }
3456 })
3457
3458 (define_insn "cgt_<mode>_celledp"
3459 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3460 (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
3461 (match_operand:VDF 2 "spu_reg_operand" "r")))]
3462 "spu_arch == PROCESSOR_CELLEDP"
3463 "dfcgt\t%0,%1,%2"
3464 [(set_attr "type" "fpd")])
3465
3466 (define_insn "cmgt_<mode>_celledp"
3467 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3468 (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
3469 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
3470 "spu_arch == PROCESSOR_CELLEDP"
3471 "dfcmgt\t%0,%1,%2"
3472 [(set_attr "type" "fpd")])
3473
3474 (define_expand "cgt_v2df"
3475 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3476 (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
3477 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
3478 ""
3479 {
3480 if (spu_arch == PROCESSOR_CELL)
3481 {
3482 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3483 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3484 rtx zero = gen_reg_rtx (V4SImode);
3485 rtx temp = gen_reg_rtx (TImode);
3486 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3487 rtx temp2 = gen_reg_rtx (V4SImode);
3488 rtx hi_inf = gen_reg_rtx (V4SImode);
3489 rtx a_nan = gen_reg_rtx (V4SImode);
3490 rtx b_nan = gen_reg_rtx (V4SImode);
3491 rtx a_abs = gen_reg_rtx (V4SImode);
3492 rtx b_abs = gen_reg_rtx (V4SImode);
3493 rtx asel = gen_reg_rtx (V4SImode);
3494 rtx bsel = gen_reg_rtx (V4SImode);
3495 rtx abor = gen_reg_rtx (V4SImode);
3496 rtx bbor = gen_reg_rtx (V4SImode);
3497 rtx gt_hi = gen_reg_rtx (V4SImode);
3498 rtx gt_lo = gen_reg_rtx (V4SImode);
3499 rtx sign_mask = gen_reg_rtx (V4SImode);
3500 rtx nan_mask = gen_reg_rtx (V4SImode);
3501 rtx hi_promote = gen_reg_rtx (TImode);
3502 rtx borrow_shuffle = gen_reg_rtx (TImode);
3503 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3504 0x7FFFFFFF, 0xFFFFFFFF);
3505 emit_move_insn (sign_mask, pat);
3506 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3507 0x7FF00000, 0x0);
3508 emit_move_insn (nan_mask, pat);
3509 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3510 0x08090A0B, 0x08090A0B);
3511 emit_move_insn (hi_promote, pat);
3512 pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
3513 0x0C0D0E0F, 0xC0C0C0C0);
3514 emit_move_insn (borrow_shuffle, pat);
3515
3516 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3517 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
3518 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3519 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3520 GEN_INT (4 * 8)));
3521 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3522 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3523 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
3524 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3525 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
3526 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
3527 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
3528 GEN_INT (4 * 8)));
3529 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3530 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
3531 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
3532 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
3533 emit_move_insn (zero, CONST0_RTX (V4SImode));
3534 emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
3535 emit_insn (gen_shufb (asel, asel, asel, hi_promote));
3536 emit_insn (gen_bg_v4si (abor, zero, a_abs));
3537 emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
3538 emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
3539 emit_insn (gen_selb (abor, a_abs, abor, asel));
3540 emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
3541 emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
3542 emit_insn (gen_bg_v4si (bbor, zero, b_abs));
3543 emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
3544 emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
3545 emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
3546 emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
3547 emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
3548 emit_insn (gen_ceq_v4si (temp2, abor, bbor));
3549 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
3550 GEN_INT (4 * 8)));
3551 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
3552 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
3553
3554 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
3555 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3556 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
3557 DONE;
3558 }
3559 })
3560
3561 (define_expand "cmgt_v2df"
3562 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3563 (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
3564 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
3565 ""
3566 {
3567 if (spu_arch == PROCESSOR_CELL)
3568 {
3569 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3570 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3571 rtx temp = gen_reg_rtx (TImode);
3572 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3573 rtx temp2 = gen_reg_rtx (V4SImode);
3574 rtx hi_inf = gen_reg_rtx (V4SImode);
3575 rtx a_nan = gen_reg_rtx (V4SImode);
3576 rtx b_nan = gen_reg_rtx (V4SImode);
3577 rtx a_abs = gen_reg_rtx (V4SImode);
3578 rtx b_abs = gen_reg_rtx (V4SImode);
3579 rtx gt_hi = gen_reg_rtx (V4SImode);
3580 rtx gt_lo = gen_reg_rtx (V4SImode);
3581 rtx sign_mask = gen_reg_rtx (V4SImode);
3582 rtx nan_mask = gen_reg_rtx (V4SImode);
3583 rtx hi_promote = gen_reg_rtx (TImode);
3584 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3585 0x7FFFFFFF, 0xFFFFFFFF);
3586 emit_move_insn (sign_mask, pat);
3587 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3588 0x7FF00000, 0x0);
3589 emit_move_insn (nan_mask, pat);
3590 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3591 0x08090A0B, 0x08090A0B);
3592 emit_move_insn (hi_promote, pat);
3593
3594 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3595 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
3596 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3597 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3598 GEN_INT (4 * 8)));
3599 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3600 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3601 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
3602 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3603 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
3604 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
3605 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
3606 GEN_INT (4 * 8)));
3607 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3608 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
3609 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
3610 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
3611
3612 emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
3613 emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
3614 emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
3615 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
3616 GEN_INT (4 * 8)));
3617 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
3618 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
3619 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
3620 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3621 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
3622 DONE;
3623 }
3624 })
3625
3626 \f
3627 ;; clgt
3628
3629 (define_insn "clgt_<mode>"
3630 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
3631 (gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
3632 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
3633 ""
3634 "@
3635 clgt<bh>\t%0,%1,%2
3636 clgt<bh>i\t%0,%1,%2")
3637
3638 (define_insn_and_split "clgt_di"
3639 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3640 (gtu:SI (match_operand:DI 1 "spu_reg_operand" "r")
3641 (match_operand:DI 2 "spu_reg_operand" "r")))
3642 (clobber (match_scratch:V4SI 3 "=&r"))
3643 (clobber (match_scratch:V4SI 4 "=&r"))
3644 (clobber (match_scratch:V4SI 5 "=&r"))]
3645 ""
3646 "#"
3647 "reload_completed"
3648 [(set (match_dup:SI 0)
3649 (gtu:SI (match_dup:DI 1)
3650 (match_dup:DI 2)))]
3651 {
3652 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
3653 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
3654 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
3655 rtx op3 = operands[3];
3656 rtx op4 = operands[4];
3657 rtx op5 = operands[5];
3658 rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5]));
3659 emit_insn (gen_clgt_v4si (op3, op1, op2));
3660 emit_insn (gen_ceq_v4si (op4, op1, op2));
3661 emit_insn (gen_spu_xswd (op5d, op3));
3662 emit_insn (gen_selb (op0, op3, op5, op4));
3663 DONE;
3664 })
3665
3666 (define_insn "clgt_ti"
3667 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3668 (gtu:SI (match_operand:TI 1 "spu_reg_operand" "r")
3669 (match_operand:TI 2 "spu_reg_operand" "r")))
3670 (clobber (match_scratch:V4SI 3 "=&r"))
3671 (clobber (match_scratch:V4SI 4 "=&r"))]
3672 ""
3673 "ceq\t%3,%1,%2\;\
3674 clgt\t%4,%1,%2\;\
3675 shlqbyi\t%0,%4,4\;\
3676 selb\t%0,%4,%0,%3\;\
3677 shlqbyi\t%0,%0,4\;\
3678 selb\t%0,%4,%0,%3\;\
3679 shlqbyi\t%0,%0,4\;\
3680 selb\t%0,%4,%0,%3"
3681 [(set_attr "type" "multi0")
3682 (set_attr "length" "32")])
3683
3684 \f
3685 ;; dftsv
3686 (define_insn "dftsv_celledp"
3687 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3688 (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
3689 (match_operand:SI 2 "const_int_operand" "i")]
3690 UNSPEC_DFTSV))]
3691 "spu_arch == PROCESSOR_CELLEDP"
3692 "dftsv\t%0,%1,%2"
3693 [(set_attr "type" "fpd")])
3694
3695 (define_expand "dftsv"
3696 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3697 (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
3698 (match_operand:SI 2 "const_int_operand" "i")]
3699 UNSPEC_DFTSV))]
3700 ""
3701 {
3702 if (spu_arch == PROCESSOR_CELL)
3703 {
3704 rtx result = gen_reg_rtx (V4SImode);
3705 emit_move_insn (result, CONST0_RTX (V4SImode));
3706
3707 if (INTVAL (operands[2]))
3708 {
3709 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3710 rtx abs = gen_reg_rtx (V4SImode);
3711 rtx sign = gen_reg_rtx (V4SImode);
3712 rtx temp = gen_reg_rtx (TImode);
3713 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3714 rtx temp2 = gen_reg_rtx (V4SImode);
3715 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3716 0x7FFFFFFF, 0xFFFFFFFF);
3717 rtx sign_mask = gen_reg_rtx (V4SImode);
3718 rtx hi_promote = gen_reg_rtx (TImode);
3719 emit_move_insn (sign_mask, pat);
3720 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3721 0x08090A0B, 0x08090A0B);
3722 emit_move_insn (hi_promote, pat);
3723
3724 emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
3725 emit_insn (gen_shufb (sign, sign, sign, hi_promote));
3726 emit_insn (gen_andv4si3 (abs, ra, sign_mask));
3727
3728 /* NaN or +inf or -inf */
3729 if (INTVAL (operands[2]) & 0x70)
3730 {
3731 rtx nan_mask = gen_reg_rtx (V4SImode);
3732 rtx isinf = gen_reg_rtx (V4SImode);
3733 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3734 0x7FF00000, 0x0);
3735 emit_move_insn (nan_mask, pat);
3736 emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
3737
3738 /* NaN */
3739 if (INTVAL (operands[2]) & 0x40)
3740 {
3741 rtx isnan = gen_reg_rtx (V4SImode);
3742 emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
3743 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
3744 GEN_INT (4 * 8)));
3745 emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
3746 emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
3747 emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
3748 emit_insn (gen_iorv4si3 (result, result, isnan));
3749 }
3750 /* +inf or -inf */
3751 if (INTVAL (operands[2]) & 0x30)
3752 {
3753 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
3754 GEN_INT (4 * 8)));
3755 emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
3756 emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
3757
3758 /* +inf */
3759 if (INTVAL (operands[2]) & 0x20)
3760 {
3761 emit_insn (gen_andc_v4si (temp2, isinf, sign));
3762 emit_insn (gen_iorv4si3 (result, result, temp2));
3763 }
3764 /* -inf */
3765 if (INTVAL (operands[2]) & 0x10)
3766 {
3767 emit_insn (gen_andv4si3 (temp2, isinf, sign));
3768 emit_insn (gen_iorv4si3 (result, result, temp2));
3769 }
3770 }
3771 }
3772
3773 /* 0 or denorm */
3774 if (INTVAL (operands[2]) & 0xF)
3775 {
3776 rtx iszero = gen_reg_rtx (V4SImode);
3777 emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
3778 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3779 GEN_INT (4 * 8)));
3780 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3781
3782 /* denorm */
3783 if (INTVAL (operands[2]) & 0x3)
3784 {
3785 rtx isdenorm = gen_reg_rtx (V4SImode);
3786 rtx denorm_mask = gen_reg_rtx (V4SImode);
3787 emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
3788 emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
3789 emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
3790 emit_insn (gen_shufb (isdenorm, isdenorm,
3791 isdenorm, hi_promote));
3792 /* +denorm */
3793 if (INTVAL (operands[2]) & 0x2)
3794 {
3795 emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
3796 emit_insn (gen_iorv4si3 (result, result, temp2));
3797 }
3798 /* -denorm */
3799 if (INTVAL (operands[2]) & 0x1)
3800 {
3801 emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
3802 emit_insn (gen_iorv4si3 (result, result, temp2));
3803 }
3804 }
3805
3806 /* 0 */
3807 if (INTVAL (operands[2]) & 0xC)
3808 {
3809 emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
3810 /* +0 */
3811 if (INTVAL (operands[2]) & 0x8)
3812 {
3813 emit_insn (gen_andc_v4si (temp2, iszero, sign));
3814 emit_insn (gen_iorv4si3 (result, result, temp2));
3815 }
3816 /* -0 */
3817 if (INTVAL (operands[2]) & 0x4)
3818 {
3819 emit_insn (gen_andv4si3 (temp2, iszero, sign));
3820 emit_insn (gen_iorv4si3 (result, result, temp2));
3821 }
3822 }
3823 }
3824 }
3825 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
3826 DONE;
3827 }
3828 })
3829
3830
3831 ;; branches
3832
3833 (define_insn ""
3834 [(set (pc)
3835 (if_then_else (match_operator 1 "branch_comparison_operator"
3836 [(match_operand 2
3837 "spu_reg_operand" "r")
3838 (const_int 0)])
3839 (label_ref (match_operand 0 "" ""))
3840 (pc)))]
3841 ""
3842 "br%b2%b1z\t%2,%0"
3843 [(set_attr "type" "br")])
3844
3845 (define_insn ""
3846 [(set (pc)
3847 (if_then_else (match_operator 0 "branch_comparison_operator"
3848 [(match_operand 1
3849 "spu_reg_operand" "r")
3850 (const_int 0)])
3851 (return)
3852 (pc)))]
3853 "direct_return ()"
3854 "bi%b1%b0z\t%1,$lr"
3855 [(set_attr "type" "br")])
3856
3857 (define_insn ""
3858 [(set (pc)
3859 (if_then_else (match_operator 1 "branch_comparison_operator"
3860 [(match_operand 2
3861 "spu_reg_operand" "r")
3862 (const_int 0)])
3863 (pc)
3864 (label_ref (match_operand 0 "" ""))))]
3865 ""
3866 "br%b2%b1z\t%2,%0"
3867 [(set_attr "type" "br")])
3868
3869 (define_insn ""
3870 [(set (pc)
3871 (if_then_else (match_operator 0 "branch_comparison_operator"
3872 [(match_operand 1
3873 "spu_reg_operand" "r")
3874 (const_int 0)])
3875 (pc)
3876 (return)))]
3877 "direct_return ()"
3878 "bi%b1%b0z\t%1,$lr"
3879 [(set_attr "type" "br")])
3880
3881 \f
3882 ;; vector conditional compare patterns
3883 (define_expand "vcond<mode><mode>"
3884 [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
3885 (if_then_else:VCMP
3886 (match_operator 3 "comparison_operator"
3887 [(match_operand:VCMP 4 "spu_reg_operand" "r")
3888 (match_operand:VCMP 5 "spu_reg_operand" "r")])
3889 (match_operand:VCMP 1 "spu_reg_operand" "r")
3890 (match_operand:VCMP 2 "spu_reg_operand" "r")))]
3891 ""
3892 {
3893 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3894 operands[3], operands[4], operands[5]))
3895 DONE;
3896 else
3897 FAIL;
3898 })
3899
3900 (define_expand "vcondu<mode><mode>"
3901 [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
3902 (if_then_else:VCMPU
3903 (match_operator 3 "comparison_operator"
3904 [(match_operand:VCMPU 4 "spu_reg_operand" "r")
3905 (match_operand:VCMPU 5 "spu_reg_operand" "r")])
3906 (match_operand:VCMPU 1 "spu_reg_operand" "r")
3907 (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
3908 ""
3909 {
3910 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3911 operands[3], operands[4], operands[5]))
3912 DONE;
3913 else
3914 FAIL;
3915 })
3916
3917 \f
3918 ;; branch on condition
3919
3920 (define_expand "cbranch<mode>4"
3921 [(use (match_operator 0 "ordered_comparison_operator"
3922 [(match_operand:VQHSI 1 "spu_reg_operand" "")
3923 (match_operand:VQHSI 2 "spu_nonmem_operand" "")]))
3924 (use (match_operand 3 ""))]
3925 ""
3926 { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3927
3928 (define_expand "cbranch<mode>4"
3929 [(use (match_operator 0 "ordered_comparison_operator"
3930 [(match_operand:DTI 1 "spu_reg_operand" "")
3931 (match_operand:DTI 2 "spu_reg_operand" "")]))
3932 (use (match_operand 3 ""))]
3933 ""
3934 { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3935
3936 (define_expand "cbranch<mode>4"
3937 [(use (match_operator 0 "ordered_comparison_operator"
3938 [(match_operand:VSF 1 "spu_reg_operand" "")
3939 (match_operand:VSF 2 "spu_reg_operand" "")]))
3940 (use (match_operand 3 ""))]
3941 ""
3942 { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3943
3944 (define_expand "cbranchdf4"
3945 [(use (match_operator 0 "ordered_comparison_operator"
3946 [(match_operand:DF 1 "spu_reg_operand" "")
3947 (match_operand:DF 2 "spu_reg_operand" "")]))
3948 (use (match_operand 3 ""))]
3949 ""
3950 { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3951
3952 \f
3953 ;; set on condition
3954
3955 (define_expand "cstore<mode>4"
3956 [(use (match_operator 1 "ordered_comparison_operator"
3957 [(match_operand:VQHSI 2 "spu_reg_operand" "")
3958 (match_operand:VQHSI 3 "spu_nonmem_operand" "")]))
3959 (clobber (match_operand:SI 0 "spu_reg_operand"))]
3960 ""
3961 { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3962
3963 (define_expand "cstore<mode>4"
3964 [(use (match_operator 1 "ordered_comparison_operator"
3965 [(match_operand:DTI 2 "spu_reg_operand" "")
3966 (match_operand:DTI 3 "spu_reg_operand" "")]))
3967 (clobber (match_operand:SI 0 "spu_reg_operand"))]
3968 ""
3969 { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3970
3971 (define_expand "cstore<mode>4"
3972 [(use (match_operator 1 "ordered_comparison_operator"
3973 [(match_operand:VSF 2 "spu_reg_operand" "")
3974 (match_operand:VSF 3 "spu_reg_operand" "")]))
3975 (clobber (match_operand:SI 0 "spu_reg_operand"))]
3976 ""
3977 { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3978
3979 (define_expand "cstoredf4"
3980 [(use (match_operator 1 "ordered_comparison_operator"
3981 [(match_operand:DF 2 "spu_reg_operand" "")
3982 (match_operand:DF 3 "spu_reg_operand" "")]))
3983 (clobber (match_operand:SI 0 "spu_reg_operand"))]
3984 ""
3985 { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3986
3987 \f
3988 ;; conditional move
3989
3990 ;; Define this first one so HAVE_conditional_move is defined.
3991 (define_insn "movcc_dummy"
3992 [(set (match_operand 0 "" "")
3993 (if_then_else (match_operand 1 "" "")
3994 (match_operand 2 "" "")
3995 (match_operand 3 "" "")))]
3996 "!operands[0]"
3997 "")
3998
3999 (define_expand "mov<mode>cc"
4000 [(set (match_operand:ALL 0 "spu_reg_operand" "")
4001 (if_then_else:ALL (match_operand 1 "ordered_comparison_operator" "")
4002 (match_operand:ALL 2 "spu_reg_operand" "")
4003 (match_operand:ALL 3 "spu_reg_operand" "")))]
4004 ""
4005 {
4006 spu_emit_branch_or_set(2, operands[1], operands);
4007 DONE;
4008 })
4009
4010 ;; This pattern is used when the result of a compare is not large
4011 ;; enough to use in a selb when expanding conditional moves.
4012 (define_expand "extend_compare"
4013 [(set (match_operand 0 "spu_reg_operand" "=r")
4014 (unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
4015 ""
4016 {
4017 emit_insn (gen_rtx_SET (operands[0],
4018 gen_rtx_UNSPEC (GET_MODE (operands[0]),
4019 gen_rtvec (1, operands[1]),
4020 UNSPEC_EXTEND_CMP)));
4021 DONE;
4022 })
4023
4024 (define_insn "extend_compare<mode>"
4025 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
4026 (unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
4027 "operands != NULL"
4028 "fsm\t%0,%1"
4029 [(set_attr "type" "shuf")])
4030
4031 \f
4032 ;; case
4033
4034 ;; operand 0 is index
4035 ;; operand 1 is the minimum bound
4036 ;; operand 2 is the maximum bound - minimum bound + 1
4037 ;; operand 3 is CODE_LABEL for the table;
4038 ;; operand 4 is the CODE_LABEL to go to if index out of range.
4039 (define_expand "casesi"
4040 [(match_operand:SI 0 "spu_reg_operand" "")
4041 (match_operand:SI 1 "immediate_operand" "")
4042 (match_operand:SI 2 "immediate_operand" "")
4043 (match_operand 3 "" "")
4044 (match_operand 4 "" "")]
4045 ""
4046 {
4047 rtx table = gen_reg_rtx (SImode);
4048 rtx index = gen_reg_rtx (SImode);
4049 rtx sindex = gen_reg_rtx (SImode);
4050 rtx addr = gen_reg_rtx (Pmode);
4051
4052 emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3]));
4053
4054 emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1])));
4055 emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2)));
4056 emit_move_insn (addr, gen_rtx_MEM (SImode,
4057 gen_rtx_PLUS (SImode, table, sindex)));
4058 if (flag_pic)
4059 emit_insn (gen_addsi3 (addr, addr, table));
4060
4061 emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]);
4062 emit_jump_insn (gen_tablejump (addr, operands[3]));
4063 DONE;
4064 })
4065
4066 (define_insn "tablejump"
4067 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))
4068 (use (label_ref (match_operand 1 "" "")))]
4069 ""
4070 "bi\t%0"
4071 [(set_attr "type" "br")])
4072
4073 \f
4074 ;; call
4075
4076 ;; Note that operand 1 is total size of args, in bytes,
4077 ;; and what the call insn wants is the number of words.
4078 (define_expand "sibcall"
4079 [(parallel
4080 [(call (match_operand:QI 0 "call_operand" "")
4081 (match_operand:QI 1 "" ""))
4082 (use (reg:SI 0))])]
4083 ""
4084 {
4085 if (! call_operand (operands[0], QImode))
4086 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
4087 })
4088
4089 (define_insn "_sibcall"
4090 [(parallel
4091 [(call (match_operand:QI 0 "call_operand" "R,S")
4092 (match_operand:QI 1 "" "i,i"))
4093 (use (reg:SI 0))])]
4094 "SIBLING_CALL_P(insn)"
4095 "@
4096 bi\t%i0
4097 br\t%0"
4098 [(set_attr "type" "br,br")])
4099
4100 (define_expand "sibcall_value"
4101 [(parallel
4102 [(set (match_operand 0 "" "")
4103 (call (match_operand:QI 1 "call_operand" "")
4104 (match_operand:QI 2 "" "")))
4105 (use (reg:SI 0))])]
4106 ""
4107 {
4108 if (! call_operand (operands[1], QImode))
4109 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
4110 })
4111
4112 (define_insn "_sibcall_value"
4113 [(parallel
4114 [(set (match_operand 0 "" "")
4115 (call (match_operand:QI 1 "call_operand" "R,S")
4116 (match_operand:QI 2 "" "i,i")))
4117 (use (reg:SI 0))])]
4118 "SIBLING_CALL_P(insn)"
4119 "@
4120 bi\t%i1
4121 br\t%1"
4122 [(set_attr "type" "br,br")])
4123
4124 ;; Note that operand 1 is total size of args, in bytes,
4125 ;; and what the call insn wants is the number of words.
4126 (define_expand "call"
4127 [(parallel
4128 [(call (match_operand:QI 0 "call_operand" "")
4129 (match_operand:QI 1 "" ""))
4130 (clobber (reg:SI 0))
4131 (clobber (reg:SI 130))])]
4132 ""
4133 {
4134 if (! call_operand (operands[0], QImode))
4135 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
4136 })
4137
4138 (define_insn "_call"
4139 [(parallel
4140 [(call (match_operand:QI 0 "call_operand" "R,S,T")
4141 (match_operand:QI 1 "" "i,i,i"))
4142 (clobber (reg:SI 0))
4143 (clobber (reg:SI 130))])]
4144 ""
4145 "@
4146 bisl\t$lr,%i0
4147 brsl\t$lr,%0
4148 brasl\t$lr,%0"
4149 [(set_attr "type" "br")])
4150
4151 (define_expand "call_value"
4152 [(parallel
4153 [(set (match_operand 0 "" "")
4154 (call (match_operand:QI 1 "call_operand" "")
4155 (match_operand:QI 2 "" "")))
4156 (clobber (reg:SI 0))
4157 (clobber (reg:SI 130))])]
4158 ""
4159 {
4160 if (! call_operand (operands[1], QImode))
4161 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
4162 })
4163
4164 (define_insn "_call_value"
4165 [(parallel
4166 [(set (match_operand 0 "" "")
4167 (call (match_operand:QI 1 "call_operand" "R,S,T")
4168 (match_operand:QI 2 "" "i,i,i")))
4169 (clobber (reg:SI 0))
4170 (clobber (reg:SI 130))])]
4171 ""
4172 "@
4173 bisl\t$lr,%i1
4174 brsl\t$lr,%1
4175 brasl\t$lr,%1"
4176 [(set_attr "type" "br")])
4177
4178 (define_expand "untyped_call"
4179 [(parallel [(call (match_operand 0 "" "")
4180 (const_int 0))
4181 (match_operand 1 "" "")
4182 (match_operand 2 "" "")])]
4183 ""
4184 {
4185 int i;
4186 rtx reg = gen_rtx_REG (TImode, 3);
4187
4188 /* We need to use call_value so the return value registers don't get
4189 * clobbered. */
4190 emit_call_insn (gen_call_value (reg, operands[0], const0_rtx));
4191
4192 for (i = 0; i < XVECLEN (operands[2], 0); i++)
4193 {
4194 rtx set = XVECEXP (operands[2], 0, i);
4195 emit_move_insn (SET_DEST (set), SET_SRC (set));
4196 }
4197
4198 /* The optimizer does not know that the call sets the function value
4199 registers we stored in the result block. We avoid problems by
4200 claiming that all hard registers are used and clobbered at this
4201 point. */
4202 emit_insn (gen_blockage ());
4203
4204 DONE;
4205 })
4206
4207 \f
4208 ;; Patterns used for splitting and combining.
4209
4210 \f
4211 ;; Function prologue and epilogue.
4212
4213 (define_expand "prologue"
4214 [(const_int 1)]
4215 ""
4216 { spu_expand_prologue (); DONE; })
4217
4218 ;; "blockage" is only emitted in epilogue. This is what it took to
4219 ;; make "basic block reordering" work with the insns sequence
4220 ;; generated by the spu_expand_epilogue (taken from mips.md)
4221
4222 (define_insn "blockage"
4223 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
4224 ""
4225 ""
4226 [(set_attr "type" "convert")
4227 (set_attr "length" "0")])
4228
4229 (define_expand "epilogue"
4230 [(const_int 2)]
4231 ""
4232 { spu_expand_epilogue (false); DONE; })
4233
4234 (define_expand "sibcall_epilogue"
4235 [(const_int 2)]
4236 ""
4237 { spu_expand_epilogue (true); DONE; })
4238
4239 \f
4240 ;; stack manipulations
4241
4242 ;; An insn to allocate new stack space for dynamic use (e.g., alloca).
4243 ;; We move the back-chain and decrement the stack pointer.
4244 (define_expand "allocate_stack"
4245 [(set (match_operand 0 "spu_reg_operand" "")
4246 (minus (reg 1) (match_operand 1 "spu_nonmem_operand" "")))
4247 (set (reg 1)
4248 (minus (reg 1) (match_dup 1)))]
4249 ""
4250 "spu_allocate_stack (operands[0], operands[1]); DONE;")
4251
4252 ;; These patterns say how to save and restore the stack pointer. We need not
4253 ;; save the stack pointer at function level since we are careful to preserve
4254 ;; the backchain.
4255 ;;
4256
4257 ;; At block level the stack pointer is saved and restored, so that the
4258 ;; stack space allocated within a block is deallocated when leaving
4259 ;; block scope. By default, according to the SPU ABI, the stack
4260 ;; pointer and available stack size are saved in a register. Upon
4261 ;; restoration, the stack pointer is simply copied back, and the
4262 ;; current available stack size is calculated against the restored
4263 ;; stack pointer.
4264 ;;
4265 ;; For nonlocal gotos, we must save the stack pointer and its
4266 ;; backchain and restore both. Note that in the nonlocal case, the
4267 ;; save area is a memory location.
4268
4269 (define_expand "save_stack_function"
4270 [(match_operand 0 "general_operand" "")
4271 (match_operand 1 "general_operand" "")]
4272 ""
4273 "DONE;")
4274
4275 (define_expand "restore_stack_function"
4276 [(match_operand 0 "general_operand" "")
4277 (match_operand 1 "general_operand" "")]
4278 ""
4279 "DONE;")
4280
4281 (define_expand "restore_stack_block"
4282 [(match_operand 0 "spu_reg_operand" "")
4283 (match_operand 1 "memory_operand" "")]
4284 ""
4285 "
4286 {
4287 spu_restore_stack_block (operands[0], operands[1]);
4288 DONE;
4289 }")
4290
4291 (define_expand "save_stack_nonlocal"
4292 [(match_operand 0 "memory_operand" "")
4293 (match_operand 1 "spu_reg_operand" "")]
4294 ""
4295 "
4296 {
4297 rtx temp = gen_reg_rtx (Pmode);
4298
4299 /* Copy the backchain to the first word, sp to the second. We need to
4300 save the back chain because __builtin_apply appears to clobber it. */
4301 emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1]));
4302 emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp);
4303 emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]);
4304 DONE;
4305 }")
4306
4307 (define_expand "restore_stack_nonlocal"
4308 [(match_operand 0 "spu_reg_operand" "")
4309 (match_operand 1 "memory_operand" "")]
4310 ""
4311 "
4312 {
4313 spu_restore_stack_nonlocal(operands[0], operands[1]);
4314 DONE;
4315 }")
4316
4317 \f
4318 ;; vector patterns
4319
4320 ;; Vector initialization
4321 (define_expand "vec_init<mode>"
4322 [(match_operand:V 0 "register_operand" "")
4323 (match_operand 1 "" "")]
4324 ""
4325 {
4326 spu_expand_vector_init (operands[0], operands[1]);
4327 DONE;
4328 })
4329
4330 (define_expand "vec_set<mode>"
4331 [(use (match_operand:SI 2 "spu_nonmem_operand" ""))
4332 (set (match_dup:TI 3)
4333 (unspec:TI [(match_dup:SI 4)
4334 (match_dup:SI 5)
4335 (match_dup:SI 6)] UNSPEC_CPAT))
4336 (set (match_operand:V 0 "spu_reg_operand" "")
4337 (unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "")
4338 (match_dup:V 0)
4339 (match_dup:TI 3)] UNSPEC_SHUFB))]
4340 ""
4341 {
4342 HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode);
4343 rtx offset = GEN_INT (INTVAL (operands[2]) * size);
4344 operands[3] = gen_reg_rtx (TImode);
4345 operands[4] = stack_pointer_rtx;
4346 operands[5] = offset;
4347 operands[6] = GEN_INT (size);
4348 })
4349
4350 (define_expand "vec_extract<mode>"
4351 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
4352 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
4353 (parallel [(match_operand 2 "const_int_operand" "i")])))]
4354 ""
4355 {
4356 if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0)
4357 {
4358 emit_insn (gen_spu_convert (operands[0], operands[1]));
4359 DONE;
4360 }
4361 })
4362
4363 (define_insn "_vec_extract<mode>"
4364 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
4365 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
4366 (parallel [(match_operand 2 "const_int_operand" "i")])))]
4367 ""
4368 "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16"
4369 [(set_attr "type" "shuf")])
4370
4371 (define_insn "_vec_extractv8hi_ze"
4372 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
4373 (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r")
4374 (parallel [(const_int 0)]))))]
4375 ""
4376 "rotqmbyi\t%0,%1,-2"
4377 [(set_attr "type" "shuf")])
4378
4379 \f
4380 ;; misc
4381
4382 (define_expand "shufb"
4383 [(set (match_operand 0 "spu_reg_operand" "")
4384 (unspec [(match_operand 1 "spu_reg_operand" "")
4385 (match_operand 2 "spu_reg_operand" "")
4386 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))]
4387 ""
4388 {
4389 rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]);
4390 PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
4391 emit_insn (s);
4392 DONE;
4393 })
4394
4395 (define_insn "_shufb"
4396 [(set (match_operand 0 "spu_reg_operand" "=r")
4397 (unspec [(match_operand 1 "spu_reg_operand" "r")
4398 (match_operand 2 "spu_reg_operand" "r")
4399 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))]
4400 "operands != NULL"
4401 "shufb\t%0,%1,%2,%3"
4402 [(set_attr "type" "shuf")])
4403
4404 ; The semantics of vec_permv16qi are nearly identical to those of the SPU
4405 ; shufb instruction, except that we need to reduce the selector modulo 32.
4406 (define_expand "vec_permv16qi"
4407 [(set (match_dup 4) (and:V16QI (match_operand:V16QI 3 "spu_reg_operand" "")
4408 (match_dup 6)))
4409 (set (match_operand:V16QI 0 "spu_reg_operand" "")
4410 (unspec:V16QI
4411 [(match_operand:V16QI 1 "spu_reg_operand" "")
4412 (match_operand:V16QI 2 "spu_reg_operand" "")
4413 (match_dup 5)]
4414 UNSPEC_SHUFB))]
4415 ""
4416 {
4417 operands[4] = gen_reg_rtx (V16QImode);
4418 operands[5] = gen_lowpart (TImode, operands[4]);
4419 operands[6] = spu_const (V16QImode, 31);
4420 })
4421
4422 (define_insn "nop"
4423 [(unspec_volatile [(const_int 0)] UNSPECV_NOP)]
4424 ""
4425 "nop"
4426 [(set_attr "type" "nop")])
4427
4428 (define_insn "nopn"
4429 [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPECV_NOP)]
4430 ""
4431 "nop\t%0"
4432 [(set_attr "type" "nop")])
4433
4434 (define_insn "lnop"
4435 [(unspec_volatile [(const_int 0)] UNSPECV_LNOP)]
4436 ""
4437 "lnop"
4438 [(set_attr "type" "lnop")])
4439
4440 ;; The operand is so we know why we generated this hbrp.
4441 ;; We clobber mem to make sure it isn't moved over any
4442 ;; loads, stores or calls while scheduling.
4443 (define_insn "iprefetch"
4444 [(unspec [(match_operand:SI 0 "const_int_operand" "n")] UNSPEC_IPREFETCH)
4445 (clobber (mem:BLK (scratch)))]
4446 ""
4447 "hbrp\t# %0"
4448 [(set_attr "type" "iprefetch")])
4449
4450 ;; A non-volatile version so it gets scheduled
4451 (define_insn "nopn_nv"
4452 [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_NOP)]
4453 ""
4454 "nop\t%0"
4455 [(set_attr "type" "nop")])
4456
4457 (define_insn "hbr"
4458 [(set (reg:SI 130)
4459 (unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i")
4460 (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR))
4461 (unspec [(const_int 0)] UNSPEC_HBR)]
4462 ""
4463 "@
4464 hbr\t%0,%1
4465 hbrr\t%0,%1
4466 hbra\t%0,%1"
4467 [(set_attr "type" "hbr")])
4468
4469 (define_insn "sync"
4470 [(unspec_volatile [(const_int 0)] UNSPECV_SYNC)
4471 (clobber (mem:BLK (scratch)))]
4472 ""
4473 "sync"
4474 [(set_attr "type" "br")])
4475
4476 (define_insn "syncc"
4477 [(unspec_volatile [(const_int 1)] UNSPECV_SYNC)
4478 (clobber (mem:BLK (scratch)))]
4479 ""
4480 "syncc"
4481 [(set_attr "type" "br")])
4482
4483 (define_insn "dsync"
4484 [(unspec_volatile [(const_int 2)] UNSPECV_SYNC)
4485 (clobber (mem:BLK (scratch)))]
4486 ""
4487 "dsync"
4488 [(set_attr "type" "br")])
4489
4490 \f
4491
4492 ;; Define the subtract-one-and-jump insns so loop.c
4493 ;; knows what to generate.
4494 (define_expand "doloop_end"
4495 [(use (match_operand 0 "" "")) ; loop pseudo
4496 (use (match_operand 1 "" ""))] ; label
4497 ""
4498 "
4499 {
4500 /* Currently SMS relies on the do-loop pattern to recognize loops
4501 where (1) the control part comprises of all insns defining and/or
4502 using a certain 'count' register and (2) the loop count can be
4503 adjusted by modifying this register prior to the loop.
4504 . ??? The possible introduction of a new block to initialize the
4505 new IV can potentially effects branch optimizations. */
4506 if (optimize > 0 && flag_modulo_sched)
4507 {
4508 rtx s0;
4509 rtx bcomp;
4510 rtx loc_ref;
4511
4512 if (GET_MODE (operands[0]) != SImode)
4513 FAIL;
4514
4515 s0 = operands [0];
4516 emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
4517 bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
4518 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
4519 emit_jump_insn (gen_rtx_SET (pc_rtx,
4520 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4521 loc_ref, pc_rtx)));
4522
4523 DONE;
4524 }else
4525 FAIL;
4526 }")
4527
4528 ;; convert between any two modes, avoiding any GCC assumptions
4529 (define_expand "spu_convert"
4530 [(set (match_operand 0 "spu_reg_operand" "")
4531 (unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))]
4532 ""
4533 {
4534 rtx c = gen__spu_convert (operands[0], operands[1]);
4535 PUT_MODE (SET_SRC (c), GET_MODE (operands[0]));
4536 emit_insn (c);
4537 DONE;
4538 })
4539
4540 (define_insn_and_split "_spu_convert"
4541 [(set (match_operand 0 "spu_reg_operand" "=r")
4542 (unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
4543 ""
4544 "#"
4545 "reload_completed"
4546 [(const_int 0)]
4547 {
4548 spu_split_convert (operands);
4549 DONE;
4550 }
4551 [(set_attr "type" "convert")
4552 (set_attr "length" "0")])
4553
4554 \f
4555 ;;
4556 (include "spu-builtins.md")
4557
4558
4559 (define_expand "smaxv4sf3"
4560 [(set (match_operand:V4SF 0 "register_operand" "=r")
4561 (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
4562 (match_operand:V4SF 2 "register_operand" "r")))]
4563 ""
4564 "
4565 {
4566 rtx mask = gen_reg_rtx (V4SImode);
4567
4568 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
4569 emit_insn (gen_selb (operands[0], operands[2], operands[1], mask));
4570 DONE;
4571 }")
4572
4573 (define_expand "sminv4sf3"
4574 [(set (match_operand:V4SF 0 "register_operand" "=r")
4575 (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
4576 (match_operand:V4SF 2 "register_operand" "r")))]
4577 ""
4578 "
4579 {
4580 rtx mask = gen_reg_rtx (V4SImode);
4581
4582 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
4583 emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
4584 DONE;
4585 }")
4586
4587 (define_expand "smaxv2df3"
4588 [(set (match_operand:V2DF 0 "register_operand" "=r")
4589 (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
4590 (match_operand:V2DF 2 "register_operand" "r")))]
4591 ""
4592 "
4593 {
4594 rtx mask = gen_reg_rtx (V2DImode);
4595 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
4596 emit_insn (gen_selb (operands[0], operands[2], operands[1],
4597 spu_gen_subreg (V4SImode, mask)));
4598 DONE;
4599 }")
4600
4601 (define_expand "sminv2df3"
4602 [(set (match_operand:V2DF 0 "register_operand" "=r")
4603 (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
4604 (match_operand:V2DF 2 "register_operand" "r")))]
4605 ""
4606 "
4607 {
4608 rtx mask = gen_reg_rtx (V2DImode);
4609 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
4610 emit_insn (gen_selb (operands[0], operands[1], operands[2],
4611 spu_gen_subreg (V4SImode, mask)));
4612 DONE;
4613 }")
4614
4615 (define_insn "vec_widen_smult_odd_v8hi"
4616 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
4617 (mult:V4SI
4618 (sign_extend:V4SI
4619 (vec_select:V4HI
4620 (match_operand:V8HI 1 "spu_reg_operand" "r,r")
4621 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
4622 (sign_extend:V4SI
4623 (vec_select:V4HI
4624 (match_operand:V8HI 2 "spu_arith_operand" "r,B")
4625 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
4626 ""
4627 "@
4628 mpy\t%0,%1,%2
4629 mpyi\t%0,%1,%2"
4630 [(set_attr "type" "fp7")])
4631
4632 (define_insn "vec_widen_umult_odd_v8hi"
4633 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
4634 (mult:V4SI
4635 (zero_extend:V4SI
4636 (vec_select:V4HI
4637 (match_operand:V8HI 1 "spu_reg_operand" "r,r")
4638 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
4639 (zero_extend:V4SI
4640 (vec_select:V4HI
4641 (match_operand:V8HI 2 "spu_arith_operand" "r,B")
4642 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
4643 ""
4644 "@
4645 mpyu\t%0,%1,%2
4646 mpyui\t%0,%1,%2"
4647 [(set_attr "type" "fp7")])
4648
4649 (define_insn "vec_widen_smult_even_v8hi"
4650 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4651 (mult:V4SI
4652 (sign_extend:V4SI
4653 (vec_select:V4HI
4654 (match_operand:V8HI 1 "spu_reg_operand" "r")
4655 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
4656 (sign_extend:V4SI
4657 (vec_select:V4HI
4658 (match_operand:V8HI 2 "spu_reg_operand" "r")
4659 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
4660 ""
4661 "mpyhh\t%0,%1,%2"
4662 [(set_attr "type" "fp7")])
4663
4664 (define_insn "vec_widen_umult_even_v8hi"
4665 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4666 (mult:V4SI
4667 (zero_extend:V4SI
4668 (vec_select:V4HI
4669 (match_operand:V8HI 1 "spu_reg_operand" "r")
4670 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
4671 (zero_extend:V4SI
4672 (vec_select:V4HI
4673 (match_operand:V8HI 2 "spu_reg_operand" "r")
4674 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
4675 ""
4676 "mpyhhu\t%0,%1,%2"
4677 [(set_attr "type" "fp7")])
4678
4679 (define_expand "vec_widen_umult_hi_v8hi"
4680 [(set (match_operand:V4SI 0 "register_operand" "=r")
4681 (mult:V4SI
4682 (zero_extend:V4SI
4683 (vec_select:V4HI
4684 (match_operand:V8HI 1 "register_operand" "r")
4685 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4686 (zero_extend:V4SI
4687 (vec_select:V4HI
4688 (match_operand:V8HI 2 "register_operand" "r")
4689 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4690 ""
4691 "
4692 {
4693 rtx ve = gen_reg_rtx (V4SImode);
4694 rtx vo = gen_reg_rtx (V4SImode);
4695 rtx mask = gen_reg_rtx (TImode);
4696 unsigned char arr[16] = {
4697 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4698 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4699
4700 emit_move_insn (mask, array_to_constant (TImode, arr));
4701 emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
4702 emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
4703 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4704 DONE;
4705 }")
4706
4707 (define_expand "vec_widen_umult_lo_v8hi"
4708 [(set (match_operand:V4SI 0 "register_operand" "=r")
4709 (mult:V4SI
4710 (zero_extend:V4SI
4711 (vec_select:V4HI
4712 (match_operand:V8HI 1 "register_operand" "r")
4713 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4714 (zero_extend:V4SI
4715 (vec_select:V4HI
4716 (match_operand:V8HI 2 "register_operand" "r")
4717 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4718 ""
4719 "
4720 {
4721 rtx ve = gen_reg_rtx (V4SImode);
4722 rtx vo = gen_reg_rtx (V4SImode);
4723 rtx mask = gen_reg_rtx (TImode);
4724 unsigned char arr[16] = {
4725 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4726 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4727
4728 emit_move_insn (mask, array_to_constant (TImode, arr));
4729 emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
4730 emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
4731 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4732 DONE;
4733 }")
4734
4735 (define_expand "vec_widen_smult_hi_v8hi"
4736 [(set (match_operand:V4SI 0 "register_operand" "=r")
4737 (mult:V4SI
4738 (sign_extend:V4SI
4739 (vec_select:V4HI
4740 (match_operand:V8HI 1 "register_operand" "r")
4741 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4742 (sign_extend:V4SI
4743 (vec_select:V4HI
4744 (match_operand:V8HI 2 "register_operand" "r")
4745 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4746 ""
4747 "
4748 {
4749 rtx ve = gen_reg_rtx (V4SImode);
4750 rtx vo = gen_reg_rtx (V4SImode);
4751 rtx mask = gen_reg_rtx (TImode);
4752 unsigned char arr[16] = {
4753 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4754 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4755
4756 emit_move_insn (mask, array_to_constant (TImode, arr));
4757 emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
4758 emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
4759 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4760 DONE;
4761 }")
4762
4763 (define_expand "vec_widen_smult_lo_v8hi"
4764 [(set (match_operand:V4SI 0 "register_operand" "=r")
4765 (mult:V4SI
4766 (sign_extend:V4SI
4767 (vec_select:V4HI
4768 (match_operand:V8HI 1 "register_operand" "r")
4769 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4770 (sign_extend:V4SI
4771 (vec_select:V4HI
4772 (match_operand:V8HI 2 "register_operand" "r")
4773 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4774 ""
4775 "
4776 {
4777 rtx ve = gen_reg_rtx (V4SImode);
4778 rtx vo = gen_reg_rtx (V4SImode);
4779 rtx mask = gen_reg_rtx (TImode);
4780 unsigned char arr[16] = {
4781 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4782 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4783
4784 emit_move_insn (mask, array_to_constant (TImode, arr));
4785 emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
4786 emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
4787 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4788 DONE;
4789 }")
4790
4791 (define_expand "vec_realign_load_<mode>"
4792 [(set (match_operand:ALL 0 "register_operand" "=r")
4793 (unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
4794 (match_operand:ALL 2 "register_operand" "r")
4795 (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
4796 ""
4797 "
4798 {
4799 emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3]));
4800 DONE;
4801 }")
4802
4803 (define_expand "spu_lvsr"
4804 [(set (match_operand:V16QI 0 "register_operand" "")
4805 (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
4806 ""
4807 "
4808 {
4809 rtx addr;
4810 rtx offset = gen_reg_rtx (V8HImode);
4811 rtx addr_bits = gen_reg_rtx (SImode);
4812 rtx addr_bits_vec = gen_reg_rtx (V8HImode);
4813 rtx splatqi = gen_reg_rtx (TImode);
4814 rtx result = gen_reg_rtx (V8HImode);
4815 unsigned char arr[16] = {
4816 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
4817 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
4818 unsigned char arr2[16] = {
4819 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
4820 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
4821
4822 emit_move_insn (offset, array_to_constant (V8HImode, arr));
4823 emit_move_insn (splatqi, array_to_constant (TImode, arr2));
4824
4825 gcc_assert (GET_CODE (operands[1]) == MEM);
4826 addr = force_reg (Pmode, XEXP (operands[1], 0));
4827 emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF)));
4828 emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
4829
4830 /* offset - (addr & 0xF)
4831 It is safe to use a single sfh, because each byte of offset is > 15 and
4832 each byte of addr is <= 15. */
4833 emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
4834
4835 result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
4836 emit_move_insn (operands[0], result);
4837
4838 DONE;
4839 }")
4840
4841 (define_expand "vec_unpacku_hi_v8hi"
4842 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4843 (zero_extend:V4SI
4844 (vec_select:V4HI
4845 (match_operand:V8HI 1 "spu_reg_operand" "r")
4846 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
4847 ""
4848 {
4849 rtx mask = gen_reg_rtx (TImode);
4850 unsigned char arr[16] = {
4851 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
4852 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
4853
4854 emit_move_insn (mask, array_to_constant (TImode, arr));
4855 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4856
4857 DONE;
4858 })
4859
4860 (define_expand "vec_unpacku_lo_v8hi"
4861 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4862 (zero_extend:V4SI
4863 (vec_select:V4HI
4864 (match_operand:V8HI 1 "spu_reg_operand" "r")
4865 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4866 ""
4867 {
4868 rtx mask = gen_reg_rtx (TImode);
4869 unsigned char arr[16] = {
4870 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
4871 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
4872
4873 emit_move_insn (mask, array_to_constant (TImode, arr));
4874 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4875
4876 DONE;
4877 })
4878
4879 (define_expand "vec_unpacks_hi_v8hi"
4880 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4881 (sign_extend:V4SI
4882 (vec_select:V4HI
4883 (match_operand:V8HI 1 "spu_reg_operand" "r")
4884 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
4885 ""
4886 {
4887 rtx tmp1 = gen_reg_rtx (V8HImode);
4888 rtx tmp2 = gen_reg_rtx (V4SImode);
4889 rtx mask = gen_reg_rtx (TImode);
4890 unsigned char arr[16] = {
4891 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
4892 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
4893
4894 emit_move_insn (mask, array_to_constant (TImode, arr));
4895 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
4896 emit_insn (gen_spu_xshw (tmp2, tmp1));
4897 emit_move_insn (operands[0], tmp2);
4898
4899 DONE;
4900 })
4901
4902 (define_expand "vec_unpacks_lo_v8hi"
4903 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4904 (sign_extend:V4SI
4905 (vec_select:V4HI
4906 (match_operand:V8HI 1 "spu_reg_operand" "r")
4907 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4908 ""
4909 {
4910 rtx tmp1 = gen_reg_rtx (V8HImode);
4911 rtx tmp2 = gen_reg_rtx (V4SImode);
4912 rtx mask = gen_reg_rtx (TImode);
4913 unsigned char arr[16] = {
4914 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
4915 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
4916
4917 emit_move_insn (mask, array_to_constant (TImode, arr));
4918 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
4919 emit_insn (gen_spu_xshw (tmp2, tmp1));
4920 emit_move_insn (operands[0], tmp2);
4921
4922 DONE;
4923 })
4924
4925 (define_expand "vec_unpacku_hi_v16qi"
4926 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4927 (zero_extend:V8HI
4928 (vec_select:V8QI
4929 (match_operand:V16QI 1 "spu_reg_operand" "r")
4930 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
4931 (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4932 ""
4933 {
4934 rtx mask = gen_reg_rtx (TImode);
4935 unsigned char arr[16] = {
4936 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
4937 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
4938
4939 emit_move_insn (mask, array_to_constant (TImode, arr));
4940 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4941
4942 DONE;
4943 })
4944
4945 (define_expand "vec_unpacku_lo_v16qi"
4946 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4947 (zero_extend:V8HI
4948 (vec_select:V8QI
4949 (match_operand:V16QI 1 "spu_reg_operand" "r")
4950 (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
4951 (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
4952 ""
4953 {
4954 rtx mask = gen_reg_rtx (TImode);
4955 unsigned char arr[16] = {
4956 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
4957 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
4958
4959 emit_move_insn (mask, array_to_constant (TImode, arr));
4960 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4961
4962 DONE;
4963 })
4964
4965 (define_expand "vec_unpacks_hi_v16qi"
4966 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4967 (sign_extend:V8HI
4968 (vec_select:V8QI
4969 (match_operand:V16QI 1 "spu_reg_operand" "r")
4970 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
4971 (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4972 ""
4973 {
4974 rtx tmp1 = gen_reg_rtx (V16QImode);
4975 rtx tmp2 = gen_reg_rtx (V8HImode);
4976 rtx mask = gen_reg_rtx (TImode);
4977 unsigned char arr[16] = {
4978 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
4979 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
4980
4981 emit_move_insn (mask, array_to_constant (TImode, arr));
4982 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
4983 emit_insn (gen_spu_xsbh (tmp2, tmp1));
4984 emit_move_insn (operands[0], tmp2);
4985
4986 DONE;
4987 })
4988
4989 (define_expand "vec_unpacks_lo_v16qi"
4990 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4991 (sign_extend:V8HI
4992 (vec_select:V8QI
4993 (match_operand:V16QI 1 "spu_reg_operand" "r")
4994 (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
4995 (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
4996 ""
4997 {
4998 rtx tmp1 = gen_reg_rtx (V16QImode);
4999 rtx tmp2 = gen_reg_rtx (V8HImode);
5000 rtx mask = gen_reg_rtx (TImode);
5001 unsigned char arr[16] = {
5002 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
5003 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
5004
5005 emit_move_insn (mask, array_to_constant (TImode, arr));
5006 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
5007 emit_insn (gen_spu_xsbh (tmp2, tmp1));
5008 emit_move_insn (operands[0], tmp2);
5009
5010 DONE;
5011 })
5012
5013 \f
5014 (define_expand "vec_pack_trunc_v8hi"
5015 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
5016 (vec_concat:V16QI
5017 (truncate:V8QI (match_operand:V8HI 1 "spu_reg_operand" "r"))
5018 (truncate:V8QI (match_operand:V8HI 2 "spu_reg_operand" "r"))))]
5019 ""
5020 "
5021 {
5022 rtx mask = gen_reg_rtx (TImode);
5023 unsigned char arr[16] = {
5024 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
5025 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F};
5026
5027 emit_move_insn (mask, array_to_constant (TImode, arr));
5028 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
5029
5030 DONE;
5031 }")
5032
5033 (define_expand "vec_pack_trunc_v4si"
5034 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
5035 (vec_concat:V8HI
5036 (truncate:V4HI (match_operand:V4SI 1 "spu_reg_operand" "r"))
5037 (truncate:V4HI (match_operand:V4SI 2 "spu_reg_operand" "r"))))]
5038 ""
5039 "
5040 {
5041 rtx mask = gen_reg_rtx (TImode);
5042 unsigned char arr[16] = {
5043 0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
5044 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F};
5045
5046 emit_move_insn (mask, array_to_constant (TImode, arr));
5047 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
5048
5049 DONE;
5050 }")
5051
5052 (define_insn "stack_protect_set"
5053 [(set (match_operand:SI 0 "memory_operand" "=m")
5054 (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
5055 (set (match_scratch:SI 2 "=&r") (const_int 0))]
5056 ""
5057 "lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2"
5058 [(set_attr "length" "12")
5059 (set_attr "type" "multi1")]
5060 )
5061
5062 (define_expand "stack_protect_test"
5063 [(match_operand 0 "memory_operand" "")
5064 (match_operand 1 "memory_operand" "")
5065 (match_operand 2 "" "")]
5066 ""
5067 {
5068 rtx compare_result;
5069 rtx bcomp, loc_ref;
5070
5071 compare_result = gen_reg_rtx (SImode);
5072
5073 emit_insn (gen_stack_protect_test_si (compare_result,
5074 operands[0],
5075 operands[1]));
5076
5077 bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx);
5078
5079 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]);
5080
5081 emit_jump_insn (gen_rtx_SET (pc_rtx,
5082 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
5083 loc_ref, pc_rtx)));
5084
5085 DONE;
5086 })
5087
5088 (define_insn "stack_protect_test_si"
5089 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
5090 (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
5091 (match_operand:SI 2 "memory_operand" "m")]
5092 UNSPEC_SP_TEST))
5093 (set (match_scratch:SI 3 "=&r") (const_int 0))]
5094 ""
5095 "lq%p1\t%0,%1\;lq%p2\t%3,%2\;ceq\t%0,%0,%3\;xor\t%3,%3,%3"
5096 [(set_attr "length" "16")
5097 (set_attr "type" "multi1")]
5098 )
5099
5100 ; Atomic operations
5101 ;
5102 ; SPU execution is always single-threaded, so there is no need for real
5103 ; atomic operations. We provide the atomic primitives anyway so that
5104 ; code expecting the builtins to be present (like libgfortran) will work.
5105
5106 ;; Types that we should provide atomic instructions for.
5107 (define_mode_iterator AINT [QI HI SI DI TI])
5108
5109 (define_code_iterator ATOMIC [plus minus ior xor and mult])
5110 (define_code_attr atomic_name
5111 [(plus "add") (minus "sub")
5112 (ior "or") (xor "xor") (and "and") (mult "nand")])
5113 (define_code_attr atomic_pred
5114 [(plus "spu_arith_operand") (minus "spu_reg_operand")
5115 (ior "spu_logical_operand") (xor "spu_logical_operand")
5116 (and "spu_logical_operand") (mult "spu_logical_operand")])
5117
5118 (define_expand "atomic_load<mode>"
5119 [(set (match_operand:AINT 0 "spu_reg_operand" "") ;; output
5120 (match_operand:AINT 1 "memory_operand" "")) ;; memory
5121 (use (match_operand:SI 2 "const_int_operand" ""))] ;; model
5122 ""
5123 {
5124 if (MEM_ADDR_SPACE (operands[1]))
5125 FAIL;
5126
5127 emit_move_insn (operands[0], operands[1]);
5128 DONE;
5129 })
5130
5131 (define_expand "atomic_store<mode>"
5132 [(set (match_operand:AINT 0 "memory_operand" "") ;; memory
5133 (match_operand:AINT 1 "spu_reg_operand" "")) ;; input
5134 (use (match_operand:SI 2 "const_int_operand" ""))] ;; model
5135 ""
5136 {
5137 if (MEM_ADDR_SPACE (operands[0]))
5138 FAIL;
5139
5140 emit_move_insn (operands[0], operands[1]);
5141 DONE;
5142 })
5143
5144 (define_expand "atomic_compare_and_swap<mode>"
5145 [(match_operand:SI 0 "spu_reg_operand" "") ;; bool out
5146 (match_operand:AINT 1 "spu_reg_operand" "") ;; val out
5147 (match_operand:AINT 2 "memory_operand" "") ;; memory
5148 (match_operand:AINT 3 "spu_nonmem_operand" "") ;; expected
5149 (match_operand:AINT 4 "spu_nonmem_operand" "") ;; desired
5150 (match_operand:SI 5 "const_int_operand" "") ;; is_weak
5151 (match_operand:SI 6 "const_int_operand" "") ;; model succ
5152 (match_operand:SI 7 "const_int_operand" "")] ;; model fail
5153 ""
5154 {
5155 rtx boolval, retval, label;
5156
5157 if (MEM_ADDR_SPACE (operands[2]))
5158 FAIL;
5159
5160 boolval = gen_reg_rtx (SImode);
5161 retval = gen_reg_rtx (<MODE>mode);
5162 label = gen_label_rtx ();
5163
5164 emit_move_insn (retval, operands[2]);
5165 emit_move_insn (boolval, const0_rtx);
5166
5167 emit_cmp_and_jump_insns (retval, operands[3], NE, NULL_RTX,
5168 <MODE>mode, 1, label);
5169
5170 emit_move_insn (operands[2], operands[4]);
5171 emit_move_insn (boolval, const1_rtx);
5172
5173 emit_label (label);
5174
5175 emit_move_insn (operands[0], boolval);
5176 emit_move_insn (operands[1], retval);
5177 DONE;
5178 })
5179
5180 (define_expand "atomic_exchange<mode>"
5181 [(match_operand:AINT 0 "spu_reg_operand" "") ;; output
5182 (match_operand:AINT 1 "memory_operand" "") ;; memory
5183 (match_operand:AINT 2 "spu_nonmem_operand" "") ;; input
5184 (match_operand:SI 3 "const_int_operand" "")] ;; model
5185 ""
5186 {
5187 rtx retval;
5188
5189 if (MEM_ADDR_SPACE (operands[1]))
5190 FAIL;
5191
5192 retval = gen_reg_rtx (<MODE>mode);
5193
5194 emit_move_insn (retval, operands[1]);
5195 emit_move_insn (operands[1], operands[2]);
5196 emit_move_insn (operands[0], retval);
5197 DONE;
5198 })
5199
5200 (define_expand "atomic_<atomic_name><mode>"
5201 [(ATOMIC:AINT
5202 (match_operand:AINT 0 "memory_operand" "") ;; memory
5203 (match_operand:AINT 1 "<atomic_pred>" "")) ;; operand
5204 (match_operand:SI 2 "const_int_operand" "")] ;; model
5205 ""
5206 {
5207 if (MEM_ADDR_SPACE (operands[0]))
5208 FAIL;
5209
5210 spu_expand_atomic_op (<CODE>, operands[0], operands[1],
5211 NULL_RTX, NULL_RTX);
5212 DONE;
5213 })
5214
5215 (define_expand "atomic_fetch_<atomic_name><mode>"
5216 [(match_operand:AINT 0 "spu_reg_operand" "") ;; output
5217 (ATOMIC:AINT
5218 (match_operand:AINT 1 "memory_operand" "") ;; memory
5219 (match_operand:AINT 2 "<atomic_pred>" "")) ;; operand
5220 (match_operand:SI 3 "const_int_operand" "")] ;; model
5221 ""
5222 {
5223 if (MEM_ADDR_SPACE (operands[1]))
5224 FAIL;
5225
5226 spu_expand_atomic_op (<CODE>, operands[1], operands[2],
5227 operands[0], NULL_RTX);
5228 DONE;
5229 })
5230
5231 (define_expand "atomic_<atomic_name>_fetch<mode>"
5232 [(match_operand:AINT 0 "spu_reg_operand" "") ;; output
5233 (ATOMIC:AINT
5234 (match_operand:AINT 1 "memory_operand" "") ;; memory
5235 (match_operand:AINT 2 "<atomic_pred>" "")) ;; operand
5236 (match_operand:SI 3 "const_int_operand" "")] ;; model
5237 ""
5238 {
5239 if (MEM_ADDR_SPACE (operands[1]))
5240 FAIL;
5241
5242 spu_expand_atomic_op (<CODE>, operands[1], operands[2],
5243 NULL_RTX, operands[0]);
5244 DONE;
5245 })
5246