From 6fb122754c7650e4b766619e8fe8a63ba777c41a Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 4 Apr 2014 13:39:27 +0000 Subject: [PATCH] Backport PRs 57615, 57744, 58160, 58587, 58673, 59054 [gcc] 2014-04-04 Bill Schmidt Apply mainline 2013-11-23 Alan Modra * config/rs6000/vsx.md (fusion peepholes): Disable when !TARGET_VSX. Backport from mainline 2013-11-12 Michael Meissner PR target/59054 * config/rs6000/rs6000.md (movdi_internal32): Eliminate constraints that would allow DImode into the traditional Altivec registers, but cause undesirable code generation when loading 0 as a constant. (movdi_internal64): Likewise. (cmp_fpr): Do not use %x for CR register output. (extendsfdf2_fpr): Fix constraints when -mallow-upper-df and -mallow-upper-sf debug switches are used. Backport from mainline 2013-10-17 Michael Meissner * config/rs6000/rs6000.c (enum rs6000_reload_reg_type): Add new fields to the reg_addr array that describes the valid addressing mode for any register, general purpose registers, floating point registers, and Altivec registers. (FIRST_RELOAD_REG_CLASS): Likewise. (LAST_RELOAD_REG_CLASS): Likewise. (struct reload_reg_map_type): Likewise. (reload_reg_map_type): Likewise. (RELOAD_REG_VALID): Likewise. (RELOAD_REG_MULTIPLE): Likewise. (RELOAD_REG_INDEXED): Likewise. (RELOAD_REG_OFFSET): Likewise. (RELOAD_REG_PRE_INCDEC): Likewise. (RELOAD_REG_PRE_MODIFY): Likewise. (reg_addr): Likewise. (mode_supports_pre_incdec_p): New helper functions to say whether a given mode supports PRE_INC, PRE_DEC, and PRE_MODIFY. (mode_supports_pre_modify_p): Likewise. (rs6000_debug_vector_unit): Rearrange the -mdebug=reg output to print the valid address mode bits for each mode. (rs6000_debug_print_mode): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): New function to set up the address mask bits for each type. (rs6000_init_hard_regno_mode_ok): Use memset to clear arrays. Call rs6000_setup_reg_addr_masks to set up the address mask bits. (rs6000_legitimate_address_p): Use mode_supports_pre_incdec_p and mode_supports_pre_modify_p to determine if PRE_INC, PRE_DEC, and PRE_MODIFY are supported. (rs6000_output_move_128bit): Change to use {src,dest}_vmx_p for altivec registers, instead of {src,dest}_av_p. (rs6000_print_options_internal): Tweak the debug output slightly. Backport from mainline 2013-10-03 Michael Meissner * config/rs6000/rs6000-builtin.def (XSRDPIM): Use floatdf2, ceildf2, btruncdf2, instead of vsx_* name. * config/rs6000/vsx.md (vsx_add3): Change arithmetic iterators to only do V2DF and V4SF here. Move the DF code to rs6000.md where it is combined with SF mode. Replace with just 'v' since only vector operations are handled with these insns after moving the DF support to rs6000.md. (vsx_sub3): Likewise. (vsx_mul3): Likewise. (vsx_div3): Likewise. (vsx_fre2): Likewise. (vsx_neg2): Likewise. (vsx_abs2): Likewise. (vsx_nabs2): Likewise. (vsx_smax3): Likewise. (vsx_smin3): Likewise. (vsx_sqrt2): Likewise. (vsx_rsqrte2): Likewise. (vsx_fms4): Likewise. (vsx_nfma4): Likewise. (vsx_copysign3): Likewise. (vsx_btrunc2): Likewise. (vsx_floor2): Likewise. (vsx_ceil2): Likewise. (vsx_smaxsf3): Delete scalar ops that were moved to rs6000.md. (vsx_sminsf3): Likewise. (vsx_fmadf4): Likewise. (vsx_fmsdf4): Likewise. (vsx_nfmadf4): Likewise. (vsx_nfmsdf4): Likewise. (vsx_cmpdf_internal1): Likewise. * config/rs6000/rs6000.h (TARGET_SF_SPE): Define macros to make it simpler to select whether a target has SPE or traditional floating point support in iterators. (TARGET_DF_SPE): Likewise. (TARGET_SF_FPR): Likewise. (TARGET_DF_FPR): Likewise. (TARGET_SF_INSN): Macros to say whether floating point support exists for a given operation for expanders. (TARGET_DF_INSN): Likewise. * config/rs6000/rs6000.c (Ftrad): New mode attributes to allow combining of SF/DF mode operations, using both traditional and VSX registers. (Fvsx): Likewise. (Ff): Likewise. (Fv): Likewise. (Fs): Likewise. (Ffre): Likewise. (FFRE): Likewise. (abs2): Combine SF/DF modes using traditional floating point instructions. Add support for using the upper DF registers with VSX support, and SF registers with power8-vector support. Update expanders for operations supported by both the SPE and traditional floating point units. (abs2_fpr): Likewise. (nabs2): Likewise. (nabs2_fpr): Likewise. (neg2): Likewise. (neg2_fpr): Likewise. (add3): Likewise. (add3_fpr): Likewise. (sub3): Likewise. (sub3_fpr): Likewise. (mul3): Likewise. (mul3_fpr): Likewise. (div3): Likewise. (div3_fpr): Likewise. (sqrt3): Likewise. (sqrt3_fpr): Likewise. (fre): Likewise. (rsqrt2): Likewise. (cmp_fpr): Likewise. (smax3): Likewise. (smin3): Likewise. (smax3_vsx): Likewise. (smin3_vsx): Likewise. (negsf2): Delete SF operations that are merged with DF. (abssf2): Likewise. (addsf3): Likewise. (subsf3): Likewise. (mulsf3): Likewise. (divsf3): Likewise. (fres): Likewise. (fmasf4_fpr): Likewise. (fmssf4_fpr): Likewise. (nfmasf4_fpr): Likewise. (nfmssf4_fpr): Likewise. (sqrtsf2): Likewise. (rsqrtsf_internal1): Likewise. (smaxsf3): Likewise. (sminsf3): Likewise. (cmpsf_internal1): Likewise. (copysign3_fcpsgn): Add VSX/power8-vector support. (negdf2): Delete DF operations that are merged with SF. (absdf2): Likewise. (nabsdf2): Likewise. (adddf3): Likewise. (subdf3): Likewise. (muldf3): Likewise. (divdf3): Likewise. (fred): Likewise. (rsqrtdf_internal1): Likewise. (fmadf4_fpr): Likewise. (fmsdf4_fpr): Likewise. (nfmadf4_fpr): Likewise. (nfmsdf4_fpr): Likewise. (sqrtdf2): Likewise. (smaxdf3): Likewise. (smindf3): Likewise. (cmpdf_internal1): Likewise. (lrintdi2): Use TARGET__FPR macro. (btrunc2): Delete separate expander, and combine with the insn and add VSX instruction support. Use TARGET__FPR. (btrunc2_fpr): Likewise. (ceil2): Likewise. (ceil2_fpr): Likewise. (floor2): Likewise. (floor2_fpr): Likewise. (fma4_fpr): Combine SF and DF fused multiply/add support. Add support for using the upper registers with VSX and power8-vector. Move insns to be closer to the define_expands. On VSX systems, prefer the traditional form of FMA over the VSX version, since the traditional form allows the target not to overlap with the inputs. (fms4_fpr): Likewise. (nfma4_fpr): Likewise. (nfms4_fpr): Likewise. Backport from mainline 2013-09-27 Michael Meissner * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow DFmode, DImode, and SFmode in the upper VSX registers based on the -mupper-regs-{df,sf} flags. Fix wu constraint to be ALTIVEC_REGS if -mpower8-vector. Combine -mvsx-timode handling with the rest of the VSX register handling. * config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters. (f32_sv): Likewise. (zero_extendsidi2_lfiwzx): Add support for loading into the Altivec registers with -mpower8-vector. Use wu/wv constraints to only do VSX memory options on Altivec registers. (extendsidi2_lfiwax): Likewise. (extendsfdf2_fpr): Likewise. (mov_hardfloat, SF/SD modes): Likewise. (mov_hardfloat32, DF/DD modes): Likewise. (mov_hardfloat64, DF/DD modes): Likewise. (movdi_internal64): Likewise. Backport from mainline 2013-09-23 Michael Meissner * config/rs6000/rs6000.c (rs6000_vector_reload): Delete, combine reload helper function arrays into a single array reg_addr. (reload_fpr_gpr): Likewise. (reload_gpr_vsx): Likewise. (reload_vsx_gpr): Likewise. (struct rs6000_reg_addr): Likewise. (reg_addr): Likewise. (rs6000_debug_reg_global): Change rs6000_vector_reload, reload_fpr_gpr, reload_gpr_vsx, reload_vsx_gpr uses to reg_addr. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_secondary_reload_direct_move): Likewise. (rs6000_secondary_reload): Likewise. * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add new constraints: wu, ww, and wy. Repurpose wv constraint added during power8 changes. Put wg constraint in alphabetical order. * config/rs6000/rs6000.opt (-mvsx-scalar-float): New debug switch for future work to add ISA 2.07 VSX single precision support. (-mvsx-scalar-double): Change default from -1 to 1, update documentation comment. (-mvsx-scalar-memory): Rename debug switch to -mupper-regs-df. (-mupper-regs-df): New debug switch to control whether DF values can go in the traditional Altivec registers. (-mupper-regs-sf): New debug switch to control whether SF values can go in the traditional Altivec registers. * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print wu, ww, and wy constraints. (rs6000_init_hard_regno_mode_ok): Use ssize_t instead of int for loop variables. Rename -mvsx-scalar-memory to -mupper-regs-df. Add new constraints, wu/ww/wy. Repurpose wv constraint. (rs6000_debug_legitimate_address_p): Print if we are running before, during, or after reload. (rs6000_secondary_reload): Add a comment. (rs6000_opt_masks): Add -mupper-regs-df, -mupper-regs-sf. * config/rs6000/constraints.md (wa constraint): Sort w constraints. Update documentation string. (wd constraint): Likewise. (wf constraint): Likewise. (wg constraint): Likewise. (wn constraint): Likewise. (ws constraint): Likewise. (wt constraint): Likewise. (wx constraint): Likewise. (wz constraint): Likewise. (wu constraint): New constraint for ISA 2.07 SFmode scalar instructions. (ww constraint): Likewise. (wy constraint): Likewise. (wv constraint): Repurpose ISA 2.07 constraint that did not use in the previous submissions. * doc/md.texi (PowerPC and IBM RS6000): Likewise. Backport from mainline 2013-10-17 Michael Meissner PR target/58673 * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only restrict TImode addresses to single indirect registers if both -mquad-memory and -mvsx-timode are used. (rs6000_output_move_128bit): Use quad_load_store_p to determine if we should emit load/store quad. Remove using %y for quad memory addresses. * config/rs6000/rs6000.md (mov_ppc64, TI/PTImode): Add constraints to allow load/store quad on machines where TImode is not allowed in VSX registers. Use 'n' instead of 'F' constraint for TImode to load integer constants. Backport from mainline 2013-10-02 Michael Meissner PR target/58587 * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Turn off setting -mvsx-timode by default until the underlying problem is fixed. (RS6000_CPU, power7 defaults): Likewise. Backport from trunk 2013-08-16 Michael Meissner PR target/58160 * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the memory rtx to contain ZERO_EXTEND and SIGN_EXTEND. * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands array instead of each individual operand as a separate argument. (emit_fusion_gpr_load): Likewise. (expand_fusion_gpr_load): Add new function declaration. * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling signature to have the operands passed as an array, instead of as separate arguments. Allow ZERO_EXTEND to be in the memory address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not depend on the register live/dead flags when peepholes are run. (expand_fusion_gpr_load): New function to be called from the peephole2 pass, to change the register that addis sets to be the target register. (emit_fusion_gpr_load): Change the calling signature to have the operands passed as an array, instead of as separate arguments. Allow ZERO_EXTEND to be in the memory address, and also SIGN_EXTEND if -mpower8-fusion-sign. * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused unspec enumeration. (power8 fusion peephole/peephole2): Rework the fusion peepholes to adjust the register addis loads up in the peephole2 pass. Do not depend on the register live/dead state when the peephole pass is done. Backport from trunk 2013-07-23 Michael Meissner * config/rs6000/vector.md (xor3): Move 128-bit boolean expanders to rs6000.md. (ior3): Likewise. (and3): Likewise. (one_cmpl2): Likewise. (nor3): Likewise. (andc3): Likewise. (eqv3): Likewise. (nand3): Likewise. (orc3): Likewise. * config/rs6000/rs6000-protos.h (rs6000_split_logical): New declaration. * config/rs6000/rs6000.c (rs6000_split_logical_inner): Add support to split multi-word logical operations. (rs6000_split_logical_di): Likewise. (rs6000_split_logical): Likewise. * config/rs6000/vsx.md (VSX_L2): Delete, no longer used. (vsx_and3_32bit): Move 128-bit logical insns to rs6000.md, and allow TImode operations in 32-bit. (vsx_and3_64bit): Likewise. (vsx_ior3_32bit): Likewise. (vsx_ior3_64bit): Likewise. (vsx_xor3_32bit): Likewise. (vsx_xor3_64bit): Likewise. (vsx_one_cmpl2_32bit): Likewise. (vsx_one_cmpl2_64bit): Likewise. (vsx_nor3_32bit): Likewise. (vsx_nor3_64bit): Likewise. (vsx_andc3_32bit): Likewise. (vsx_andc3_64bit): Likewise. (vsx_eqv3_32bit): Likewise. (vsx_eqv3_64bit): Likewise. (vsx_nand3_32bit): Likewise. (vsx_nand3_64bit): Likewise. (vsx_orc3_32bit): Likewise. (vsx_orc3_64bit): Likewise. * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Always allow vector logical types in GPRs. * config/rs6000/altivec.md (altivec_and3): Move 128-bit logical insns to rs6000.md, and allow TImode operations in 32-bit. (altivec_ior3): Likewise. (altivec_xor3): Likewise. (altivec_one_cmpl2): Likewise. (altivec_nor3): Likewise. (altivec_andc3): Likewise. * config/rs6000/rs6000.md (BOOL_128): New mode iterators and mode attributes for moving the 128-bit logical operations into rs6000.md. (BOOL_REGS_OUTPUT): Likewise. (BOOL_REGS_OP1): Likewise. (BOOL_REGS_OP2): Likewise. (BOOL_REGS_UNARY): Likewise. (BOOL_REGS_AND_CR0): Likewise. (one_cmpl2): Add support for DI logical operations on 32-bit, splitting the operations to 32-bit. (anddi3): Likewise. (iordi3): Likewise. (xordi3): Likewise. (and3, 128-bit types): Rewrite 2013-06-06 logical operator changes to combine the 32/64-bit code, allow logical operations on TI mode in 32-bit, and to use similar match_operator patterns like scalar mode uses. Combine the Altivec and VSX code for logical operations, and move it here. (ior3, 128-bit types): Likewise. (xor3, 128-bit types): Likewise. (one_cmpl3, 128-bit types): Likewise. (nor3, 128-bit types): Likewise. (andc3, 128-bit types): Likewise. (eqv3, 128-bit types): Likewise. (nand3, 128-bit types): Likewise. (orc3, 128-bit types): Likewise. (and3_internal): Likewise. (bool3_internal): Likewise. (boolc3_internal1): Likewise. (boolc3_internal2): Likewise. (boolcc3_internal1): Likewise. (boolcc3_internal2): Likewise. (eqv3_internal1): Likewise. (eqv3_internal2): Likewise. (one_cmpl13_internal): Likewise. Back port from mainline: 2013-06-06 Michael Meissner Pat Haugen Peter Bergner * lib/target-supports.exp (check_p8vector_hw_available) Add power8 support. (check_effective_target_powerpc_p8vector_ok): Likewise. (is-effective-target): Likewise. (check_vect_support_and_set_flags): Likewise. Backport from mainline 2013-07-31 Michael Meissner * config/rs6000/predicates.md (fusion_gpr_addis): New predicates to support power8 load fusion. (fusion_gpr_mem_load): Likewise. * config/rs6000/rs6000-modes.def (PTImode): Update a comment. * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New declarations for power8 load fusion. (emit_fusion_gpr_load): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): If tuning for power8, turn on fusion mode by default. Turn on sign extending fusion mode if normal fusion mode is on, and we are at -O2 or -O3. (fusion_gpr_load_p): New function, return true if we can fuse an addis instruction with a dependent load to a GPR. (emit_fusion_gpr_load): Emit the instructions for power8 load fusion to GPRs. * config/rs6000/vsx.md (VSX_M2): New iterator for fusion peepholes. (VSX load fusion peepholes): New peepholes to fuse together an addi instruction with a VSX load instruction. * config/rs6000/rs6000.md (GPR load fusion peepholes): New peepholes to fuse an addis instruction with a load to a GPR base register. If we are supporting sign extending fusions, convert sign extending loads to zero extending loads and add an explicit sign extension. Backport from mainline 2013-07-18 Pat Haugen * config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag interaction for new Power8 flags and VSX. Back port from the trunk 2013-06-28 Michael Meissner PR target/57744 * config/rs6000/rs6000.h (MODES_TIEABLE_P): Do not allow PTImode to tie with any other modes. Eliminate Altivec vector mode tests, since these are a subset of ALTIVEC or VSX vector modes. Simplify code, to return 0 if testing MODE2 for a condition, if we've already tested MODE1 for the same condition. Backport from mainline 2013-06-28 Pat Haugen * config/rs6000/rs6000.md (define_insn ""): Fix insn type. Back port from the trunk 2013-06-26 Michael Meissner Pat Haugen Peter Bergner * config/rs6000/power8.md: New. * config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor setting for power8 entry. * config/rs6000/t-rs6000 (MD_INCLUDES): Add power8.md. * config/rs6000/rs6000.c (is_microcoded_insn, is_cracked_insn): Adjust test for Power4/Power5 only. (insn_must_be_first_in_group, insn_must_be_last_in_group): Add Power8 support. (force_new_group): Adjust comment. * config/rs6000/rs6000.md: Include power8.md. Back port from the trunk 2013-06-14 Michael Meissner PR target/57615 * config/rs6000/rs6000.md (mov_ppc64): Call rs6000_output_move_128bit to handle emitting quad memory operations. Set attribute length to 8 bytes. Back port from the trunk 2013-06-13 Michael Meissner * config/rs6000/rs6000.c (rs6000_option_override_internal): Move test for clearing quad memory on 32-bit later. Back port from the trunk 2013-06-12 Michael Meissner Pat Haugen Peter Bergner * config/rs6000/rs6000.c (emit_load_locked): Add support for power8 byte, half-word, and quad-word atomic instructions. (emit_store_conditional): Likewise. (rs6000_expand_atomic_compare_and_swap): Likewise. (rs6000_expand_atomic_op): Likewise. * config/rs6000/sync.md (larx): Add new modes for power8. (stcx): Likewise. (AINT): New mode iterator to include TImode as well as normal integer modes on power8. (fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so that VSX registers are not considered. Use AINT mode iterator instead of INT1 to allow inclusion of quad word atomic operations on power8. (load_locked): Likewise. (store_conditional): Likewise. (atomic_compare_and_swap): Likewise. (atomic_exchange): Likewise. (atomic_nand): Likewise. (atomic_fetch_): Likewise. (atomic_nand_fetch): Likewise. (mem_thread_fence): Use gen_loadsync_ instead of enumerating each type. (ATOMIC): On power8, add QImode, HImode modes. (load_locked_si): Varients of load_locked for QI/HI modes that promote to SImode. (load_lockedti): Convert TImode arguments to PTImode, so that we get a guaranteed even/odd register pair. (load_lockedpti): Likewise. (store_conditionalti): Likewise. (store_conditionalpti): Likewise. * config/rs6000/rs6000.md (QHI): New mode iterator for power8 atomic load/store instructions. (HSI): Likewise. Back port from the trunk 2013-06-10 Michael Meissner Pat Haugen Peter Bergner * config/rs6000/vector.md (GPR move splitter): Do not split moves of vectors in GPRS if they are direct moves or quad word load or store moves. * config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add declaration. (direct_move_p): Likewise. (quad_load_store_p): Likewise. * config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register classes into bins based on the physical register type. (reg_class_to_reg_type): Likewise. (IS_STD_REG_TYPE): Likewise. (IS_FP_VECT_REG_TYPE): Likewise. (reload_fpr_gpr): Arrays to determine what insn to use if we can use direct move instructions. (reload_gpr_vsx): Likewise. (reload_vsx_gpr): Likewise. (rs6000_init_hard_regno_mode_ok): Precalculate the register type information that is a simplification of register classes. Also precalculate direct move reload helpers. (direct_move_p): New function to return true if the operation can be done as a direct move instruciton. (quad_load_store_p): New function to return true if the operation is a quad memory operation. (rs6000_legitimize_address): If quad memory, only allow register indirect for TImode addresses. (rs6000_legitimate_address_p): Likewise. (enum reload_reg_type): Delete, replace with rs6000_reg_type. (rs6000_reload_register_type): Likewise. (register_to_reg_type): Return register type. (rs6000_secondary_reload_simple_move): New helper function for secondary reload and secondary memory needed to identify anything that is a simple move, and does not need reloading. (rs6000_secondary_reload_direct_move): New helper function for secondary reload to identify cases that can be done with several instructions via the direct move instructions. (rs6000_secondary_reload_move): New helper function for secondary reload to identify moves between register types that can be done. (rs6000_secondary_reload): Add support for quad memory operations and for direct move. (rs6000_secondary_memory_needed): Likewise. (rs6000_debug_secondary_memory_needed): Change argument names. (rs6000_output_move_128bit): New function to return the move to use for 128-bit moves, including knowing about the various limitations of quad memory operations. * config/rs6000/vsx.md (vsx_mov): Add support for quad memory operations. call rs6000_output_move_128bit for the actual instruciton(s) to generate. (vsx_movti_64bit): Likewise. * config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values. (UNSPEC_P8V_MTVSRWZ): Likewise. (UNSPEC_P8V_RELOAD_FROM_GPR): Likewise. (UNSPEC_P8V_MTVSRD): Likewise. (UNSPEC_P8V_XXPERMDI): Likewise. (UNSPEC_P8V_RELOAD_FROM_VSX): Likewise. (UNSPEC_FUSION_GPR): Likewise. (FMOVE128_GPR): New iterator for direct move. (f32_lv): New mode attribute for load/store of SFmode/SDmode values. (f32_sv): Likewise. (f32_dm): Likewise. (zero_extenddi2_internal1): Add support for power8 32-bit loads and direct move instructions. (zero_extendsidi2_lfiwzx): Likewise. (extendsidi2_lfiwax): Likewise. (extendsidi2_nocell): Likewise. (floatsi2_lfiwax): Likewise. (lfiwax): Likewise. (floatunssi2_lfiwzx): Likewise. (lfiwzx): Likewise. (fix_trunc_stfiwx): Likewise. (fixuns_trunc_stfiwx): Likewise. (mov_hardfloat, 32-bit floating point): Likewise. (mov_hardfloat64, 64-bit floating point): Likewise. (parity2_cmpb): Set length/type attr. (unnamed shift right patterns, mov_internal2): Change type attr for 'mr.' to fast_compare. (bpermd_): Change type attr to popcnt. (p8_fmrgow_): New insns for power8 direct move support. (p8_mtvsrwz_1): Likewise. (p8_mtvsrwz_2): Likewise. (reload_fpr_from_gpr): Likewise. (p8_mtvsrd_1): Likewise. (p8_mtvsrd_2): Likewise. (p8_xxpermdi_): Likewise. (reload_vsx_from_gpr): Likewise. (reload_vsx_from_gprsf): Likewise. (p8_mfvsrd_3_): LIkewise. (reload_gpr_from_vsx): Likewise. (reload_gpr_from_vsxsf): Likewise. (p8_mfvsrd_4_disf): Likewise. (multi-word GPR splits): Do not split direct moves or quad memory operations. Backport from the trunk 2013-06-06 Michael Meissner Pat Haugen Peter Bergner * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Document new power8 builtins. * config/rs6000/vector.md (and3): Add a clobber/scratch of a condition code register, to allow 128-bit logical operations to be done in the VSX or GPR registers. (nor3): Use the canonical form for nor. (eqv3): Add expanders for power8 xxleqv, xxlnand, xxlorc, vclz*, and vpopcnt* vector instructions. (nand3): Likewise. (orc3): Likewise. (clz2): LIkewise. (popcount2): Likewise. * config/rs6000/predicates.md (int_reg_operand): Rework tests so that only the GPRs are recognized. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add support for new power8 builtins. * config/rs6000/rs6000-builtin.def (xscvspdpn): Add new power8 builtin functions. (xscvdpspn): Likewise. (vclz): Likewise. (vclzb): Likewise. (vclzh): Likewise. (vclzw): Likewise. (vclzd): Likewise. (vpopcnt): Likewise. (vpopcntb): Likewise. (vpopcnth): Likewise. (vpopcntw): Likewise. (vpopcntd): Likewise. (vgbbd): Likewise. (vmrgew): Likewise. (vmrgow): Likewise. (eqv): Likewise. (eqv_v16qi3): Likewise. (eqv_v8hi3): Likewise. (eqv_v4si3): Likewise. (eqv_v2di3): Likewise. (eqv_v4sf3): Likewise. (eqv_v2df3): Likewise. (nand): Likewise. (nand_v16qi3): Likewise. (nand_v8hi3): Likewise. (nand_v4si3): Likewise. (nand_v2di3): Likewise. (nand_v4sf3): Likewise. (nand_v2df3): Likewise. (orc): Likewise. (orc_v16qi3): Likewise. (orc_v8hi3): Likewise. (orc_v4si3): Likewise. (orc_v2di3): Likewise. (orc_v4sf3): Likewise. (orc_v2df3): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): Only allow power8 quad mode in 64-bit. (rs6000_builtin_vectorized_function): Add support to vectorize ISA 2.07 count leading zeros, population count builtins. (rs6000_expand_vector_init): On ISA 2.07 use xscvdpspn to form V4SF vectors instead of xscvdpsp to avoid IEEE related traps. (builtin_function_type): Add vgbbd builtin function which takes an unsigned argument. (altivec_expand_vec_perm_const): Add support for new power8 merge instructions. * config/rs6000/vsx.md (VSX_L2): New iterator for 128-bit types, that does not include TImdoe for use with 32-bit. (UNSPEC_VSX_CVSPDPN): Support for power8 xscvdpspn and xscvspdpn instructions. (UNSPEC_VSX_CVDPSPN): Likewise. (vsx_xscvdpspn): Likewise. (vsx_xscvspdpn): Likewise. (vsx_xscvdpspn_scalar): Likewise. (vsx_xscvspdpn_directmove): Likewise. (vsx_and3): Split logical operations into 32-bit and 64-bit. Add support to do logical operations on TImode as well as VSX vector types. Allow logical operations to be done in either VSX registers or in general purpose registers in 64-bit mode. Add splitters if GPRs were used. For AND, add clobber of CCmode to allow use of ANDI on GPRs. Rewrite nor to use the canonical RTL encoding. (vsx_and3_32bit): Likewise. (vsx_and3_64bit): Likewise. (vsx_ior3): Likewise. (vsx_ior3_32bit): Likewise. (vsx_ior3_64bit): Likewise. (vsx_xor3): Likewise. (vsx_xor3_32bit): Likewise. (vsx_xor3_64bit): Likewise. (vsx_one_cmpl2): Likewise. (vsx_one_cmpl2_32bit): Likewise. (vsx_one_cmpl2_64bit): Likewise. (vsx_nor3): Likewise. (vsx_nor3_32bit): Likewise. (vsx_nor3_64bit): Likewise. (vsx_andc3): Likewise. (vsx_andc3_32bit): Likewise. (vsx_andc3_64bit): Likewise. (vsx_eqv3_32bit): Add support for power8 xxleqv, xxlnand, and xxlorc instructions. (vsx_eqv3_64bit): Likewise. (vsx_nand3_32bit): Likewise. (vsx_nand3_64bit): Likewise. (vsx_orc3_32bit): Likewise. (vsx_orc3_64bit): Likewise. * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Update comment. * config/rs6000/altivec.md (UNSPEC_VGBBD): Add power8 vgbbd instruction. (p8_vmrgew): Add power8 vmrgew and vmrgow instructions. (p8_vmrgow): Likewise. (altivec_and3): Add clobber of CCmode to allow AND using GPRs to be split under VSX. (p8v_clz2): Add power8 count leading zero support. (p8v_popcount2): Add power8 population count support. (p8v_vgbbd): Add power8 gather bits by bytes by doubleword support. * config/rs6000/rs6000.md (eqv3): Add support for powerp eqv instruction. * config/rs6000/altivec.h (vec_eqv): Add defines to export power8 builtin functions. (vec_nand): Likewise. (vec_vclz): Likewise. (vec_vclzb): Likewise. (vec_vclzd): Likewise. (vec_vclzh): Likewise. (vec_vclzw): Likewise. (vec_vgbbd): Likewise. (vec_vmrgew): Likewise. (vec_vmrgow): Likewise. (vec_vpopcnt): Likewise. (vec_vpopcntb): Likewise. (vec_vpopcntd): Likewise. (vec_vpopcnth): Likewise. (vec_vpopcntw): Likewise. Backport from trunk 2013-05-29 Michael Meissner Pat Haugen Peter Bergner * config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI instructions. (VEC_A): Likewise. (VEC_C): Likewise. (vrotl3): Likewise. (vashl3): Likewise. (vlshr3): Likewise. (vashr3): Likewise. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add support for power8 V2DI builtins. * config/rs6000/rs6000-builtin.def (abs_v2di): Add support for power8 V2DI builtins. (vupkhsw): Likewise. (vupklsw): Likewise. (vaddudm): Likewise. (vminsd): Likewise. (vmaxsd): Likewise. (vminud): Likewise. (vmaxud): Likewise. (vpkudum): Likewise. (vpksdss): Likewise. (vpkudus): Likewise. (vpksdus): Likewise. (vrld): Likewise. (vsld): Likewise. (vsrd): Likewise. (vsrad): Likewise. (vsubudm): Likewise. (vcmpequd): Likewise. (vcmpgtsd): Likewise. (vcmpgtud): Likewise. (vcmpequd_p): Likewise. (vcmpgtsd_p): Likewise. (vcmpgtud_p): Likewise. (vupkhsw): Likewise. (vupklsw): Likewise. (vaddudm): Likewise. (vmaxsd): Likewise. (vmaxud): Likewise. (vminsd): Likewise. (vminud): Likewise. (vpksdss): Likewise. (vpksdus): Likewise. (vpkudum): Likewise. (vpkudus): Likewise. (vrld): Likewise. (vsld): Likewise. (vsrad): Likewise. (vsrd): Likewise. (vsubudm): Likewise. * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Add support for power8 V2DI instructions. * config/rs6000/altivec.md (UNSPEC_VPKUHUM): Add support for power8 V2DI instructions. Combine pack and unpack insns to use an iterator for each mode. Check whether a particular mode supports Altivec instructions instead of just checking TARGET_ALTIVEC. (UNSPEC_VPKUWUM): Likewise. (UNSPEC_VPKSHSS): Likewise. (UNSPEC_VPKSWSS): Likewise. (UNSPEC_VPKUHUS): Likewise. (UNSPEC_VPKSHUS): Likewise. (UNSPEC_VPKUWUS): Likewise. (UNSPEC_VPKSWUS): Likewise. (UNSPEC_VPACK_SIGN_SIGN_SAT): Likewise. (UNSPEC_VPACK_SIGN_UNS_SAT): Likewise. (UNSPEC_VPACK_UNS_UNS_SAT): Likewise. (UNSPEC_VPACK_UNS_UNS_MOD): Likewise. (UNSPEC_VUPKHSB): Likewise. (UNSPEC_VUNPACK_HI_SIGN): Likewise. (UNSPEC_VUNPACK_LO_SIGN): Likewise. (UNSPEC_VUPKHSH): Likewise. (UNSPEC_VUPKLSB): Likewise. (UNSPEC_VUPKLSH): Likewise. (VI2): Likewise. (VI_char): Likewise. (VI_scalar): Likewise. (VI_unit): Likewise. (VP): Likewise. (VP_small): Likewise. (VP_small_lc): Likewise. (VU_char): Likewise. (add3): Likewise. (altivec_vaddcuw): Likewise. (altivec_vaddus): Likewise. (altivec_vaddss): Likewise. (sub3): Likewise. (altivec_vsubcuw): Likewise. (altivec_vsubus): Likewise. (altivec_vsubss): Likewise. (altivec_vavgs): Likewise. (altivec_vcmpbfp): Likewise. (altivec_eq): Likewise. (altivec_gt): Likewise. (altivec_gtu): Likewise. (umax3): Likewise. (smax3): Likewise. (umin3): Likewise. (smin3): Likewise. (altivec_vpkuhum): Likewise. (altivec_vpkuwum): Likewise. (altivec_vpkshss): Likewise. (altivec_vpkswss): Likewise. (altivec_vpkuhus): Likewise. (altivec_vpkshus): Likewise. (altivec_vpkuwus): Likewise. (altivec_vpkswus): Likewise. (altivec_vpksss): Likewise. (altivec_vpksus): Likewise. (altivec_vpkuus): Likewise. (altivec_vpkuum): Likewise. (altivec_vrl): Likewise. (altivec_vsl): Likewise. (altivec_vsr): Likewise. (altivec_vsra): Likewise. (altivec_vsldoi_): Likewise. (altivec_vupkhsb): Likewise. (altivec_vupkhs): Likewise. (altivec_vupkls): Likewise. (altivec_vupkhsh): Likewise. (altivec_vupklsb): Likewise. (altivec_vupklsh): Likewise. (altivec_vcmpequ_p): Likewise. (altivec_vcmpgts_p): Likewise. (altivec_vcmpgtu_p): Likewise. (abs2): Likewise. (vec_unpacks_hi_v16qi): Likewise. (vec_unpacks_hi_v8hi): Likewise. (vec_unpacks_lo_v16qi): Likewise. (vec_unpacks_hi_): Likewise. (vec_unpacks_lo_v8hi): Likewise. (vec_unpacks_lo_): Likewise. (vec_pack_trunc_v8h): Likewise. (vec_pack_trunc_v4si): Likewise. (vec_pack_trunc_): Likewise. * config/rs6000/altivec.h (vec_vaddudm): Add defines for power8 V2DI builtins. (vec_vmaxsd): Likewise. (vec_vmaxud): Likewise. (vec_vminsd): Likewise. (vec_vminud): Likewise. (vec_vpksdss): Likewise. (vec_vpksdus): Likewise. (vec_vpkudum): Likewise. (vec_vpkudus): Likewise. (vec_vrld): Likewise. (vec_vsld): Likewise. (vec_vsrad): Likewise. (vec_vsrd): Likewise. (vec_vsubudm): Likewise. (vec_vupkhsw): Likewise. (vec_vupklsw): Likewise. 2013-05-22 Michael Meissner Pat Haugen Peter Bergner * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add documentation for the power8 crypto builtins. * config/rs6000/t-rs6000 (MD_INCLUDES): Add crypto.md. * config/rs6000/rs6000-builtin.def (BU_P8V_AV_1): Add support macros for defining power8 builtin functions. (BU_P8V_AV_2): Likewise. (BU_P8V_AV_P): Likewise. (BU_P8V_VSX_1): Likewise. (BU_P8V_OVERLOAD_1): Likewise. (BU_P8V_OVERLOAD_2): Likewise. (BU_CRYPTO_1): Likewise. (BU_CRYPTO_2): Likewise. (BU_CRYPTO_3): Likewise. (BU_CRYPTO_OVERLOAD_1): Likewise. (BU_CRYPTO_OVERLOAD_2): Likewise. (XSCVSPDP): Fix typo, point to the correct instruction. (VCIPHER): Add power8 crypto builtins. (VCIPHERLAST): Likewise. (VNCIPHER): Likewise. (VNCIPHERLAST): Likewise. (VPMSUMB): Likewise. (VPMSUMH): Likewise. (VPMSUMW): Likewise. (VPERMXOR_V2DI): Likewise. (VPERMXOR_V4SI: Likewise. (VPERMXOR_V8HI: Likewise. (VPERMXOR_V16QI: Likewise. (VSHASIGMAW): Likewise. (VSHASIGMAD): Likewise. (VPMSUM): Likewise. (VPERMXOR): Likewise. (VSHASIGMA): Likewise. * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define __CRYPTO__ if the crypto instructions are available. (altivec_overloaded_builtins): Add support for overloaded power8 builtins. * config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add support for power8 crypto builtins. (builtin_function_type): Likewise. (altivec_init_builtins): Add support for builtins that take vector long long (V2DI) arguments. * config/rs6000/crypto.md: New file, define power8 crypto instructions. 2013-05-22 Michael Meissner Pat Haugen Peter Bergner * doc/invoke.texi (Option Summary): Add power8 options. (RS/6000 and PowerPC Options): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Update to use constraints.md instead of rs6000.h. Reorder w* constraints. Add wm, wn, wr documentation. * gcc/config/rs6000/constraints.md (wm): New constraint for VSX registers if direct move instructions are enabled. (wn): New constraint for no registers. (wq): New constraint for quad word even GPR registers. (wr): New constraint if 64-bit instructions are enabled. (wv): New constraint if power8 vector instructions are enabled. (wQ): New constraint for quad word memory locations. * gcc/config/rs6000/predicates.md (const_0_to_15_operand): New constraint for 0..15 for crypto instructions. (gpc_reg_operand): If VSX allow registers in VSX registers as well as GPR and floating point registers. (int_reg_operand): New predicate to match only GPR registers. (base_reg_operand): New predicate to match base registers. (quad_int_reg_operand): New predicate to match even GPR registers for quad memory operations. (vsx_reg_or_cint_operand): New predicate to allow vector logical operations in both GPR and VSX registers. (quad_memory_operand): New predicate for quad memory operations. (reg_or_indexed_operand): New predicate for direct move support. * gcc/config/rs6000/rs6000-cpus.def (ISA_2_5_MASKS_EMBEDDED): Inherit from ISA_2_4_MASKS, not ISA_2_2_MASKS. (ISA_2_7_MASKS_SERVER): New mask for ISA 2.07 (i.e. power8). (POWERPC_MASKS): Add power8 options. (power8 cpu): Use ISA_2_7_MASKS_SERVER instead of specifying the various options. * gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define _ARCH_PWR8 and __POWER8_VECTOR__ for power8. * gcc/config/rs6000/rs6000.opt (-mvsx-timode): Add documentation. (-mpower8-fusion): New power8 options. (-mpower8-fusion-sign): Likewise. (-mpower8-vector): Likewise. (-mcrypto): Likewise. (-mdirect-move): Likewise. (-mquad-memory): Likewise. * gcc/config/rs6000/rs6000.c (power8_cost): Initial definition for power8. (rs6000_hard_regno_mode_ok): Make PTImode only match even GPR registers. (rs6000_debug_reg_print): Print the base register class if -mdebug=reg. (rs6000_debug_vector_unit): Add p8_vector. (rs6000_debug_reg_global): If -mdebug=reg, print power8 constraint definitions. Also print fusion state. (rs6000_init_hard_regno_mode_ok): Set up power8 constraints. (rs6000_builtin_mask_calculate): Add power8 builtin support. (rs6000_option_override_internal): Add support for power8. (rs6000_common_init_builtins): Add debugging for skipped builtins if -mdebug=builtin. (rs6000_adjust_cost): Add power8 support. (rs6000_issue_rate): Likewise. (insn_must_be_first_in_group): Likewise. (insn_must_be_last_in_group): Likewise. (force_new_group): Likewise. (rs6000_register_move_cost): Likewise. (rs6000_opt_masks): Likewise. * config/rs6000/rs6000.h (ASM_CPU_POWER8_SPEC): If we don't have a power8 capable assembler, default to power7 options. (TARGET_DIRECT_MOVE): Likewise. (TARGET_CRYPTO): Likewise. (TARGET_P8_VECTOR): Likewise. (VECTOR_UNIT_P8_VECTOR_P): Define power8 vector support. (VECTOR_UNIT_VSX_OR_P8_VECTOR_P): Likewise. (VECTOR_MEM_P8_VECTOR_P): Likewise. (VECTOR_MEM_VSX_OR_P8_VECTOR_P): Likewise. (VECTOR_MEM_ALTIVEC_OR_VSX_P): Likewise. (TARGET_XSCVDPSPN): Likewise. (TARGET_XSCVSPDPN): Likewsie. (TARGET_SYNC_HI_QI): Likewise. (TARGET_SYNC_TI): Likewise. (MASK_CRYPTO): Likewise. (MASK_DIRECT_MOVE): Likewise. (MASK_P8_FUSION): Likewise. (MASK_P8_VECTOR): Likewise. (REG_ALLOC_ORDER): Move fr13 to be lower in priority so that the TFmode temporary used by some of the direct move instructions to get two FP temporary registers does not force creation of a stack frame. (VLOGICAL_REGNO_P): Allow vector logical operations in GPRs. (MODES_TIEABLE_P): Move the VSX tests above the Altivec tests so that any VSX registers are tieable, even if they are also an Altivec vector mode. (r6000_reg_class_enum): Add wm, wr, wv constraints. (RS6000_BTM_P8_VECTOR): Power8 builtin support. (RS6000_BTM_CRYPTO): Likewise. (RS6000_BTM_COMMON): Likewise. * config/rs6000/rs6000.md (cpu attribute): Add power8. * config/rs6000/rs6000-opts.h (PROCESSOR_POWER8): Likewise. (enum rs6000_vector): Add power8 vector support. Backport from mainline 2013-03-20 Pat Haugen * config/rs6000/predicates.md (indexed_address, update_address_mem update_indexed_address_mem): New predicates. * config/rs6000/vsx.md (vsx_extract__zero): Set correct "type" attribute for load/store instructions. * config/rs6000/dfp.md (movsd_store): Likewise. (movsd_load): Likewise. * config/rs6000/rs6000.md (zero_extenddi2_internal1): Likewise. (unnamed HI->DI extend define_insn): Likewise. (unnamed SI->DI extend define_insn): Likewise. (unnamed QI->SI extend define_insn): Likewise. (unnamed QI->HI extend define_insn): Likewise. (unnamed HI->SI extend define_insn): Likewise. (unnamed HI->SI extend define_insn): Likewise. (extendsfdf2_fpr): Likewise. (movsi_internal1): Likewise. (movsi_internal1_single): Likewise. (movhi_internal): Likewise. (movqi_internal): Likewise. (movcc_internal1): Correct mnemonic for stw insn. Set correct "type" attribute for load/store instructions. (mov_hardfloat): Set correct "type" attribute for load/store instructions. (mov_softfloat): Likewise. (mov_hardfloat32): Likewise. (mov_hardfloat64): Likewise. (mov_softfloat64): Likewise. (movdi_internal32): Likewise. (movdi_internal64): Likewise. (probe_stack_): Likewise. Backport from mainline 2013-03-20 Michael Meissner * config/rs6000/vector.md (VEC_R): Add 32-bit integer, binary floating point, and decimal floating point to reload iterator. * config/rs6000/constraints.md (wl constraint): New constraints to return FLOAT_REGS if certain options are used to reduce the number of separate patterns that exist in the file. (wx constraint): Likewise. (wz constraint): Likewise. * config/rs6000/rs6000.c (rs6000_debug_reg_global): If -mdebug=reg, print wg, wl, wx, and wz constraints. (rs6000_init_hard_regno_mode_ok): Initialize new constraints. Initialize the reload functions for 64-bit binary/decimal floating point types. (reg_offset_addressing_ok_p): If we are on a power7 or later, use LFIWZX and STFIWX to load/store 32-bit decimal types, and don't create the buffer on the stack to overcome not having a 32-bit load and store. (rs6000_emit_move): Likewise. (rs6000_secondary_memory_needed_rtx): Likewise. (rs6000_alloc_sdmode_stack_slot): Likewise. (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f via xxlxor, just like DFmode 0.0. * config/rs6000/rs6000.h (TARGET_NO_SDMODE_STACK): New macro) (define as 1 if we are running on a power7 or newer. (enum r6000_reg_class_enum): Add new constraints. * config/rs6000/dfp.md (movsd): Delete, combine with binary floating point moves in rs6000.md. Combine power6x (mfpgpr) moves with other moves by using conditional constraits (wg). Use LFIWZX and STFIWX for loading SDmode on power7. Use xxlxor to create 0.0f. (movsd splitter): Likewise. (movsd_hardfloat): Likewise. (movsd_softfloat): Likewise. * config/rs6000/rs6000.md (FMOVE32): New iterators to combine binary and decimal floating point moves. (fmove_ok): New attributes to combine binary and decimal floating point moves, and to combine power6x (mfpgpr) moves along normal floating moves. (real_value_to_target): Likewise. (f32_lr): Likewise. (f32_lm): Likewise. (f32_li): Likewise. (f32_sr): Likewise. (f32_sm): Likewise. (f32_si): Likewise. (movsf): Combine binary and decimal floating point moves. Combine power6x (mfpgpr) moves with other moves by using conditional constraits (wg). Use LFIWZX and STFIWX for loading SDmode on power7. (mov for SFmode/SDmode); Likewise. (SFmode/SDmode splitters): Likewise. (movsf_hardfloat): Likewise. (mov_hardfloat for SFmode/SDmode): Likewise. (movsf_softfloat): Likewise. (mov_softfloat for SFmode/SDmode): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wl) (wx and wz constraints. * config/rs6000/constraints.md (wg constraint): New constraint to return FLOAT_REGS if -mmfpgpr (power6x) was used. * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wg constraint. * config/rs6000/rs6000.c (rs6000_debug_reg_global): If -mdebug=reg, print wg, wl, wx, and wz constraints. (rs6000_init_hard_regno_mode_ok): Initialize new constraints. Initialize the reload functions for 64-bit binary/decimal floating point types. (reg_offset_addressing_ok_p): If we are on a power7 or later, use LFIWZX and STFIWX to load/store 32-bit decimal types, and don't create the buffer on the stack to overcome not having a 32-bit load and store. (rs6000_emit_move): Likewise. (rs6000_secondary_memory_needed_rtx): Likewise. (rs6000_alloc_sdmode_stack_slot): Likewise. (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f via xxlxor, just like DFmode 0.0. * config/rs6000/dfp.md (movdd): Delete, combine with binary floating point moves in rs6000.md. Combine power6x (mfpgpr) moves with other moves by using conditional constraits (wg). Use LFIWZX and STFIWX for loading SDmode on power7. (movdd splitters): Likewise. (movdd_hardfloat32): Likewise. (movdd_softfloat32): Likewise. (movdd_hardfloat64_mfpgpr): Likewise. (movdd_hardfloat64): Likewise. (movdd_softfloat64): Likewise. * config/rs6000/rs6000.md (FMOVE64): New iterators to combine 64-bit binary and decimal floating point moves. (FMOVE64X): Likewise. (movdf): Combine 64-bit binary and decimal floating point moves. Combine power6x (mfpgpr) moves with other moves by using conditional constraits (wg). (mov for DFmode/DDmode): Likewise. (DFmode/DDmode splitters): Likewise. (movdf_hardfloat32): Likewise. (mov_hardfloat32 for DFmode/DDmode): Likewise. (movdf_softfloat32): Likewise. (movdf_hardfloat64_mfpgpr): Likewise. (movdf_hardfloat64): Likewise. (mov_hardfloat64 for DFmode/DDmode): Likewise. (movdf_softfloat64): Likewise. (mov_softfloat64 for DFmode/DDmode): Likewise. (reload__load): Move to later in the file so they aren't in the middle of the floating point move insns. (reload__store): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wg constraint. * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print out wg constraint if -mdebug=reg. (rs6000_initi_hard_regno_mode_ok): Enable wg constraint if -mfpgpr. Enable using dd reload support if needed. * config/rs6000/dfp.md (movtd): Delete, combine with 128-bit binary and decimal floating point moves in rs6000.md. (movtd_internal): Likewise. * config/rs6000/rs6000.md (FMOVE128): Combine 128-bit binary and decimal floating point moves. (movtf): Likewise. (movtf_internal): Likewise. (mov_internal, TDmode/TFmode): Likewise. (movtf_softfloat): Likewise. (mov_softfloat, TDmode/TFmode): Likewise. * config/rs6000/rs6000.md (movdi_mfpgpr): Delete, combine with movdi_internal64, using wg constraint for move direct operations. (movdi_internal64): Likewise. * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print MODES_TIEABLE_P for selected modes. Print the numerical value of the various virtual registers. Use GPR/FPR first/last values) (instead of hard coding the register numbers. Print which modes have reload functions registered. (rs6000_option_override_internal): If -mdebug=reg, trace the options settings before/after setting cpu, target and subtarget settings. (rs6000_secondary_reload_trace): Improve the RTL dump for -mdebug=addr and for secondary reload failures in rs6000_secondary_reload_inner. (rs6000_secondary_reload_fail): Likewise. (rs6000_secondary_reload_inner): Likewise. * config/rs6000/rs6000.md (FIRST_GPR_REGNO): Add convenience macros for first/last GPR and FPR registers. (LAST_GPR_REGNO): Likewise. (FIRST_FPR_REGNO): Likewise. (LAST_FPR_REGNO): Likewise. * config/rs6000/vector.md (mul3): Use the combined macro VECTOR_UNIT_ALTIVEC_OR_VSX_P instead of separate calls to VECTOR_UNIT_ALTIVEC_P and VECTOR_UNIT_VSX_P. (vcond): Likewise. (vcondu): Likewise. (vector_gtu): Likewise. (vector_gte): Likewise. (xor3): Don't allow logical operations on TImode in 32-bit to prevent the compiler from converting DImode operations to TImode. (ior3): Likewise. (and3): Likewise. (one_cmpl2): Likewise. (nor3): Likewise. (andc3): Likewise. * config/rs6000/constraints.md (wt constraint): New constraint that returns VSX_REGS if TImode is allowed in VSX registers. * config/rs6000/predicates.md (easy_fp_constant): 0.0f is an easy constant under VSX. * config/rs6000/rs6000-modes.def (PTImode): Define, PTImode is similar to TImode, but it is restricted to being in the GPRs. * config/rs6000/rs6000.opt (-mvsx-timode): New switch to allow TImode to occupy a single VSX register. * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Default to -mvsx-timode for power7/power8. (power7 cpu): Likewise. (power8 cpu): Likewise. * config/rs6000/rs6000.c (rs6000_hard_regno_nregs_internal): Make sure that TFmode/TDmode take up two registers if they are ever allowed in the upper VSX registers. (rs6000_hard_regno_mode_ok): If -mvsx-timode, allow TImode in VSX registers. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_debug_reg_global): Add debugging for PTImode and wt constraint. Print if LRA is turned on. (rs6000_option_override_internal): Give an error if -mvsx-timode and VSX is not enabled. (invalid_e500_subreg): Handle PTImode, restricting it to GPRs. If -mvsx-timode, restrict TImode to reg+reg addressing, and PTImode to reg+offset addressing. Use PTImode when checking offset addresses for validity. (reg_offset_addressing_ok_p): Likewise. (rs6000_legitimate_offset_address_p): Likewise. (rs6000_legitimize_address): Likewise. (rs6000_legitimize_reload_address): Likewise. (rs6000_legitimate_address_p): Likewise. (rs6000_eliminate_indexed_memrefs): Likewise. (rs6000_emit_move): Likewise. (rs6000_secondary_reload): Likewise. (rs6000_secondary_reload_inner): Handle PTImode. Allow 64-bit reloads to fpr registers to continue to use reg+offset addressing) (but 64-bit reloads to altivec registers need reg+reg addressing. Drop test for PRE_MODIFY, since VSX loads/stores no longer support it. Treat LO_SUM like a PLUS operation. (rs6000_secondary_reload_class): If type is 64-bit, prefer to use FLOAT_REGS instead of VSX_RGS to allow use of reg+offset addressing. (rs6000_cannot_change_mode_class): Do not allow TImode in VSX registers to share a register with a smaller sized type, since VSX puts scalars in the upper 64-bits. (print_operand): Add support for PTImode. (rs6000_register_move_cost): Use VECTOR_MEM_VSX_P instead of VECTOR_UNIT_VSX_P to catch types that can be loaded in VSX registers, but don't have arithmetic support. (rs6000_memory_move_cost): Add test for VSX. (rs6000_opt_masks): Add -mvsx-timode. * config/rs6000/vsx.md (VSm): Change to use 64-bit aligned moves for TImode. (VSs): Likewise. (VSr): Use wt constraint for TImode. (VSv): Drop TImode support. (vsx_movti): Delete, replace with versions for 32-bit and 64-bit. (vsx_movti_64bit): Likewise. (vsx_movti_32bit): Likewise. (vec_store_): Use VSX iterator instead of vector iterator. (vsx_and3): Delete use of '?' constraint on inputs, just put one '?' on the appropriate output constraint. Do not allow TImode logical operations on 32-bit systems. (vsx_ior3): Likewise. (vsx_xor3): Likewise. (vsx_one_cmpl2): Likewise. (vsx_nor3): Likewise. (vsx_andc3): Likewise. (vsx_concat_): Likewise. (vsx_xxpermdi_): Fix thinko for non V2DF/V2DI modes. * config/rs6000/rs6000.h (MASK_VSX_TIMODE): Map from OPTION_MASK_VSX_TIMODE. (enum rs6000_reg_class_enum): Add RS6000_CONSTRAINT_wt. (STACK_SAVEAREA_MODE): Use PTImode instead of TImode. * config/rs6000/rs6000.md (INT mode attribute): Add PTImode. (TI2 iterator): New iterator for TImode, PTImode. (wd mode attribute): Add values for vector types. (movti_string): Replace TI move operations with operations for TImode and PTImode. Add support for TImode being allowed in VSX registers. (mov_string, TImode/PTImode): Likewise. (movti_ppc64): Likewise. (mov_ppc64, TImode/PTImode): Likewise. (TI mode splitters): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wt constraint. [gcc/testsuite] 2014-04-04 Bill Schmidt Backport from mainline 2013-11-22 Michael Meissner PR target/59054 * gcc.target/powerpc/direct-move.h (VSX_REG_ATTR): Allow test to specify an appropriate register class for VSX operations. (load_vsx): Use it. (load_gpr_to_vsx): Likewise. (load_vsx_to_gpr): Likewise. * gcc.target/powerpc/direct-move-vint1.c: Use an appropriate register class for VSX registers that the type can handle. Remove checks for explicit number of instructions generated, just check if the instruction is generated. * gcc.target/powerpc/direct-move-vint2.c: Likewise. * gcc.target/powerpc/direct-move-float1.c: Likewise. * gcc.target/powerpc/direct-move-float2.c: Likewise. * gcc.target/powerpc/direct-move-double1.c: Likewise. * gcc.target/powerpc/direct-move-double2.c: Likewise. * gcc.target/powerpc/direct-move-long1.c: Likewise. * gcc.target/powerpc/direct-move-long2.c: Likewise. * gcc.target/powerpc/bool3-av.c: Limit to 64-bit mode for now. * gcc.target/powerpc/bool3-p7.c: Likewise. * gcc.target/powerpc/bool3-p8.c: Likewise. * gcc.target/powerpc/p8vector-ldst.c: Just check that the appropriate instructions are generated, don't check the count. 2013-11-12 Michael Meissner PR target/59054 * gcc.target/powerpc/pr59054.c: New test. 2013-08-22 Michael Meissner * gcc.target/powerpc/pr57744.c: Declare abort. 2013-07-18 Pat Haugen * gcc.target/powerpc/pr57744.c: Fix typo. Back port from mainline 2013-10-03 Michael Meissner * gcc.target/powerpc/p8vector-fp.c: New test for floating point scalar operations when using -mupper-regs-sf and -mupper-regs-df. * gcc.target/powerpc/ppc-target-1.c: Update tests to allow either VSX scalar operations or the traditional floating point form of the instruction. * gcc.target/powerpc/ppc-target-2.c: Likewise. * gcc.target/powerpc/recip-3.c: Likewise. * gcc.target/powerpc/recip-5.c: Likewise. * gcc.target/powerpc/pr72747.c: Likewise. * gcc.target/powerpc/vsx-builtin-3.c: Likewise. Back port from mainline 2013-09-27 Michael Meissner * gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf and -mupper-regs-df. Back port from mainline 2013-10-17 Michael Meissner PR target/58673 * gcc.target/powerpc/pr58673-1.c: New file to test whether -mquad-word + -mno-vsx-timode causes errors. * gcc.target/powerpc/pr58673-2.c: Likewise. Backport from trunk. 2013-07-23 Michael Meissner * gcc.target/powerpc/bool2.h: New file, test the code generation of logical operations for power5, altivec, power7, and power8 systems. * gcc.target/powerpc/bool2-p5.c: Likewise. * gcc.target/powerpc/bool2-av.c: Likewise. * gcc.target/powerpc/bool2-p7.c: Likewise. * gcc.target/powerpc/bool2-p8.c: Likewise. * gcc.target/powerpc/bool3.h: Likewise. * gcc.target/powerpc/bool3-av.c: Likewise. * gcc.target/powerpc/bool2-p7.c: Likewise. * gcc.target/powerpc/bool2-p8.c: Likewise. Backport from trunk. 2013-07-31 Michael Meissner * gcc.target/powerpc/fusion.c: New file, test power8 fusion support. Back port from the trunk 2013-06-28 Michael Meissner PR target/57744 * gcc.target/powerpc/pr57744.c: New test to make sure lqarx and stqcx. get even registers. Back port from the trunk 2013-06-12 Michael Meissner Pat Haugen Peter Bergner * gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic load/store instructions on power7, power8. * gcc.target/powerpc/atomic-p8.c: Likewise. Back port from the trunk 2013-06-10 Michael Meissner Pat Haugen Peter Bergner * gcc.target/powerpc/direct-move-vint1.c: New tests for power8 direct move instructions. * gcc.target/powerpc/direct-move-vint2.c: Likewise. * gcc.target/powerpc/direct-move.h: Likewise. * gcc.target/powerpc/direct-move-float1.c: Likewise. * gcc.target/powerpc/direct-move-float2.c: Likewise. * gcc.target/powerpc/direct-move-double1.c: Likewise. * gcc.target/powerpc/direct-move-double2.c: Likewise. * gcc.target/powerpc/direct-move-long1.c: Likewise. * gcc.target/powerpc/direct-move-long2.c: Likewise. Backport from the trunk 2013-06-06 Michael Meissner Pat Haugen Peter Bergner * gcc.target/powerpc/p8vector-builtin-1.c: New test to test power8 builtin functions. * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c: New tests to test power8 auto-vectorization. * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c: Likewise. * gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c: Likewise. * gcc.target/powerpc/crypto-builtin-1.c: Use effective target powerpc_p8vector_ok instead of powerpc_vsx_ok. * gcc.target/powerpc/bool.c: New file, add eqv, nand, nor tests. * lib/target-supports.exp (check_p8vector_hw_available) Add power8 support. (check_effective_target_powerpc_p8vector_ok): Likewise. (is-effective-target): Likewise. (check_vect_support_and_set_flags): Likewise. Backport from trunk 2013-05-22 Michael Meissner Pat Haugen Peter Bergner * gcc.target/powerpc/crypto-builtin-1.c: New file, test for power8 crypto builtins. Backport from mainline 2013-03-20 Michael Meissner * gcc.target/powerpc/mmfpgpr.c: New test. * gcc.target/powerpc/sd-vsx.c: Likewise. * gcc.target/powerpc/sd-pwr6.c: Likewise. * gcc.target/powerpc/vsx-float0.c: Likewise. From-SVN: r209087 --- gcc/ChangeLog | 1438 ++++++++ gcc/config/rs6000/altivec.h | 36 + gcc/config/rs6000/altivec.md | 554 ++-- gcc/config/rs6000/constraints.md | 58 +- gcc/config/rs6000/crypto.md | 101 + gcc/config/rs6000/dfp.md | 315 +- gcc/config/rs6000/driver-rs6000.c | 2 +- gcc/config/rs6000/power8.md | 373 +++ gcc/config/rs6000/predicates.md | 253 ++ gcc/config/rs6000/rs6000-builtin.def | 245 +- gcc/config/rs6000/rs6000-c.c | 573 +++- gcc/config/rs6000/rs6000-cpus.def | 25 +- gcc/config/rs6000/rs6000-modes.def | 5 + gcc/config/rs6000/rs6000-opts.h | 8 +- gcc/config/rs6000/rs6000-protos.h | 7 + gcc/config/rs6000/rs6000.c | 2877 +++++++++++++++-- gcc/config/rs6000/rs6000.h | 139 +- gcc/config/rs6000/rs6000.md | 2627 ++++++++++----- gcc/config/rs6000/rs6000.opt | 47 +- gcc/config/rs6000/sync.md | 166 +- gcc/config/rs6000/t-rs6000 | 2 + gcc/config/rs6000/vector.md | 83 +- gcc/config/rs6000/vsx.md | 517 ++- gcc/doc/extend.texi | 335 ++ gcc/doc/invoke.texi | 49 +- gcc/doc/md.texi | 52 +- gcc/testsuite/ChangeLog | 175 + gcc/testsuite/gcc.target/powerpc/atomic-p7.c | 207 ++ gcc/testsuite/gcc.target/powerpc/atomic-p8.c | 237 ++ gcc/testsuite/gcc.target/powerpc/bool.c | 14 + gcc/testsuite/gcc.target/powerpc/bool2-av.c | 32 + gcc/testsuite/gcc.target/powerpc/bool2-p5.c | 32 + gcc/testsuite/gcc.target/powerpc/bool2-p7.c | 31 + gcc/testsuite/gcc.target/powerpc/bool2-p8.c | 32 + gcc/testsuite/gcc.target/powerpc/bool2.h | 29 + gcc/testsuite/gcc.target/powerpc/bool3-av.c | 37 + gcc/testsuite/gcc.target/powerpc/bool3-p7.c | 37 + gcc/testsuite/gcc.target/powerpc/bool3-p8.c | 36 + gcc/testsuite/gcc.target/powerpc/bool3.h | 186 ++ .../gcc.target/powerpc/crypto-builtin-1.c | 130 + .../gcc.target/powerpc/direct-move-double1.c | 16 + .../gcc.target/powerpc/direct-move-double2.c | 15 + .../gcc.target/powerpc/direct-move-float1.c | 18 + .../gcc.target/powerpc/direct-move-float2.c | 15 + .../gcc.target/powerpc/direct-move-long1.c | 16 + .../gcc.target/powerpc/direct-move-long2.c | 15 + .../gcc.target/powerpc/direct-move-vint1.c | 14 + .../gcc.target/powerpc/direct-move-vint2.c | 13 + .../gcc.target/powerpc/direct-move.h | 187 ++ gcc/testsuite/gcc.target/powerpc/fusion.c | 23 + gcc/testsuite/gcc.target/powerpc/mmfpgpr.c | 22 + .../gcc.target/powerpc/p8vector-builtin-1.c | 65 + .../gcc.target/powerpc/p8vector-builtin-2.c | 204 ++ .../gcc.target/powerpc/p8vector-builtin-3.c | 104 + .../gcc.target/powerpc/p8vector-builtin-4.c | 249 ++ .../gcc.target/powerpc/p8vector-builtin-5.c | 105 + .../gcc.target/powerpc/p8vector-builtin-6.c | 10 + .../gcc.target/powerpc/p8vector-builtin-7.c | 32 + .../gcc.target/powerpc/p8vector-fp.c | 139 + .../gcc.target/powerpc/p8vector-ldst.c | 42 + .../gcc.target/powerpc/p8vector-vectorize-1.c | 200 ++ .../gcc.target/powerpc/p8vector-vectorize-2.c | 30 + .../gcc.target/powerpc/p8vector-vectorize-3.c | 29 + .../gcc.target/powerpc/p8vector-vectorize-4.c | 69 + .../gcc.target/powerpc/p8vector-vectorize-5.c | 87 + .../gcc.target/powerpc/ppc-target-1.c | 3 +- .../gcc.target/powerpc/ppc-target-2.c | 3 +- gcc/testsuite/gcc.target/powerpc/pr42747.c | 2 +- gcc/testsuite/gcc.target/powerpc/pr57744.c | 39 + gcc/testsuite/gcc.target/powerpc/pr58673-1.c | 78 + gcc/testsuite/gcc.target/powerpc/pr58673-2.c | 217 ++ gcc/testsuite/gcc.target/powerpc/pr59054.c | 9 + gcc/testsuite/gcc.target/powerpc/recip-3.c | 12 +- gcc/testsuite/gcc.target/powerpc/recip-5.c | 4 +- gcc/testsuite/gcc.target/powerpc/sd-pwr6.c | 19 + gcc/testsuite/gcc.target/powerpc/sd-vsx.c | 20 + .../gcc.target/powerpc/vsx-builtin-3.c | 6 +- gcc/testsuite/gcc.target/powerpc/vsx-float0.c | 16 + gcc/testsuite/lib/target-supports.exp | 59 +- 79 files changed, 12101 insertions(+), 2207 deletions(-) create mode 100644 gcc/config/rs6000/crypto.md create mode 100644 gcc/config/rs6000/power8.md create mode 100644 gcc/testsuite/gcc.target/powerpc/atomic-p7.c create mode 100644 gcc/testsuite/gcc.target/powerpc/atomic-p8.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-av.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-p5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-p7.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-p8.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2.h create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3-av.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3-p7.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3-p8.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3.h create mode 100644 gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-double1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-double2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-float1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-float2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-long1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-long2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move.h create mode 100644 gcc/testsuite/gcc.target/powerpc/fusion.c create mode 100644 gcc/testsuite/gcc.target/powerpc/mmfpgpr.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-fp.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr57744.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr58673-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr58673-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr59054.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sd-pwr6.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sd-vsx.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-float0.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 700a17972837..d9b041af5719 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,1441 @@ +2014-04-04 Bill Schmidt + + Power8 Base Support + Apply mainline + 2013-11-23 Alan Modra + * config/rs6000/vsx.md (fusion peepholes): Disable when !TARGET_VSX. + + Backport from mainline + 2013-11-12 Michael Meissner + + PR target/59054 + * config/rs6000/rs6000.md (movdi_internal32): Eliminate + constraints that would allow DImode into the traditional Altivec + registers, but cause undesirable code generation when loading 0 as + a constant. + (movdi_internal64): Likewise. + (cmp_fpr): Do not use %x for CR register output. + (extendsfdf2_fpr): Fix constraints when -mallow-upper-df and + -mallow-upper-sf debug switches are used. + + Backport from mainline + 2013-10-17 Michael Meissner + + * config/rs6000/rs6000.c (enum rs6000_reload_reg_type): Add new + fields to the reg_addr array that describes the valid addressing + mode for any register, general purpose registers, floating point + registers, and Altivec registers. + (FIRST_RELOAD_REG_CLASS): Likewise. + (LAST_RELOAD_REG_CLASS): Likewise. + (struct reload_reg_map_type): Likewise. + (reload_reg_map_type): Likewise. + (RELOAD_REG_VALID): Likewise. + (RELOAD_REG_MULTIPLE): Likewise. + (RELOAD_REG_INDEXED): Likewise. + (RELOAD_REG_OFFSET): Likewise. + (RELOAD_REG_PRE_INCDEC): Likewise. + (RELOAD_REG_PRE_MODIFY): Likewise. + (reg_addr): Likewise. + (mode_supports_pre_incdec_p): New helper functions to say whether + a given mode supports PRE_INC, PRE_DEC, and PRE_MODIFY. + (mode_supports_pre_modify_p): Likewise. + (rs6000_debug_vector_unit): Rearrange the -mdebug=reg output to + print the valid address mode bits for each mode. + (rs6000_debug_print_mode): Likewise. + (rs6000_debug_reg_global): Likewise. + (rs6000_setup_reg_addr_masks): New function to set up the address + mask bits for each type. + (rs6000_init_hard_regno_mode_ok): Use memset to clear arrays. + Call rs6000_setup_reg_addr_masks to set up the address mask bits. + (rs6000_legitimate_address_p): Use mode_supports_pre_incdec_p and + mode_supports_pre_modify_p to determine if PRE_INC, PRE_DEC, and + PRE_MODIFY are supported. + (rs6000_output_move_128bit): Change to use {src,dest}_vmx_p for altivec + registers, instead of {src,dest}_av_p. + (rs6000_print_options_internal): Tweak the debug output slightly. + + Backport from mainline + 2013-10-03 Michael Meissner + + * config/rs6000/rs6000-builtin.def (XSRDPIM): Use floatdf2, + ceildf2, btruncdf2, instead of vsx_* name. + + * config/rs6000/vsx.md (vsx_add3): Change arithmetic + iterators to only do V2DF and V4SF here. Move the DF code to + rs6000.md where it is combined with SF mode. Replace with + just 'v' since only vector operations are handled with these insns + after moving the DF support to rs6000.md. + (vsx_sub3): Likewise. + (vsx_mul3): Likewise. + (vsx_div3): Likewise. + (vsx_fre2): Likewise. + (vsx_neg2): Likewise. + (vsx_abs2): Likewise. + (vsx_nabs2): Likewise. + (vsx_smax3): Likewise. + (vsx_smin3): Likewise. + (vsx_sqrt2): Likewise. + (vsx_rsqrte2): Likewise. + (vsx_fms4): Likewise. + (vsx_nfma4): Likewise. + (vsx_copysign3): Likewise. + (vsx_btrunc2): Likewise. + (vsx_floor2): Likewise. + (vsx_ceil2): Likewise. + (vsx_smaxsf3): Delete scalar ops that were moved to rs6000.md. + (vsx_sminsf3): Likewise. + (vsx_fmadf4): Likewise. + (vsx_fmsdf4): Likewise. + (vsx_nfmadf4): Likewise. + (vsx_nfmsdf4): Likewise. + (vsx_cmpdf_internal1): Likewise. + + * config/rs6000/rs6000.h (TARGET_SF_SPE): Define macros to make it + simpler to select whether a target has SPE or traditional floating + point support in iterators. + (TARGET_DF_SPE): Likewise. + (TARGET_SF_FPR): Likewise. + (TARGET_DF_FPR): Likewise. + (TARGET_SF_INSN): Macros to say whether floating point support + exists for a given operation for expanders. + (TARGET_DF_INSN): Likewise. + + * config/rs6000/rs6000.c (Ftrad): New mode attributes to allow + combining of SF/DF mode operations, using both traditional and VSX + registers. + (Fvsx): Likewise. + (Ff): Likewise. + (Fv): Likewise. + (Fs): Likewise. + (Ffre): Likewise. + (FFRE): Likewise. + (abs2): Combine SF/DF modes using traditional floating point + instructions. Add support for using the upper DF registers with + VSX support, and SF registers with power8-vector support. Update + expanders for operations supported by both the SPE and traditional + floating point units. + (abs2_fpr): Likewise. + (nabs2): Likewise. + (nabs2_fpr): Likewise. + (neg2): Likewise. + (neg2_fpr): Likewise. + (add3): Likewise. + (add3_fpr): Likewise. + (sub3): Likewise. + (sub3_fpr): Likewise. + (mul3): Likewise. + (mul3_fpr): Likewise. + (div3): Likewise. + (div3_fpr): Likewise. + (sqrt3): Likewise. + (sqrt3_fpr): Likewise. + (fre): Likewise. + (rsqrt2): Likewise. + (cmp_fpr): Likewise. + (smax3): Likewise. + (smin3): Likewise. + (smax3_vsx): Likewise. + (smin3_vsx): Likewise. + (negsf2): Delete SF operations that are merged with DF. + (abssf2): Likewise. + (addsf3): Likewise. + (subsf3): Likewise. + (mulsf3): Likewise. + (divsf3): Likewise. + (fres): Likewise. + (fmasf4_fpr): Likewise. + (fmssf4_fpr): Likewise. + (nfmasf4_fpr): Likewise. + (nfmssf4_fpr): Likewise. + (sqrtsf2): Likewise. + (rsqrtsf_internal1): Likewise. + (smaxsf3): Likewise. + (sminsf3): Likewise. + (cmpsf_internal1): Likewise. + (copysign3_fcpsgn): Add VSX/power8-vector support. + (negdf2): Delete DF operations that are merged with SF. + (absdf2): Likewise. + (nabsdf2): Likewise. + (adddf3): Likewise. + (subdf3): Likewise. + (muldf3): Likewise. + (divdf3): Likewise. + (fred): Likewise. + (rsqrtdf_internal1): Likewise. + (fmadf4_fpr): Likewise. + (fmsdf4_fpr): Likewise. + (nfmadf4_fpr): Likewise. + (nfmsdf4_fpr): Likewise. + (sqrtdf2): Likewise. + (smaxdf3): Likewise. + (smindf3): Likewise. + (cmpdf_internal1): Likewise. + (lrintdi2): Use TARGET__FPR macro. + (btrunc2): Delete separate expander, and combine with the + insn and add VSX instruction support. Use TARGET__FPR. + (btrunc2_fpr): Likewise. + (ceil2): Likewise. + (ceil2_fpr): Likewise. + (floor2): Likewise. + (floor2_fpr): Likewise. + (fma4_fpr): Combine SF and DF fused multiply/add support. + Add support for using the upper registers with VSX and + power8-vector. Move insns to be closer to the define_expands. On + VSX systems, prefer the traditional form of FMA over the VSX + version, since the traditional form allows the target not to + overlap with the inputs. + (fms4_fpr): Likewise. + (nfma4_fpr): Likewise. + (nfms4_fpr): Likewise. + + Backport from mainline + 2013-09-27 Michael Meissner + + * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow + DFmode, DImode, and SFmode in the upper VSX registers based on the + -mupper-regs-{df,sf} flags. Fix wu constraint to be ALTIVEC_REGS + if -mpower8-vector. Combine -mvsx-timode handling with the rest + of the VSX register handling. + + * config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters. + (f32_sv): Likewise. + (zero_extendsidi2_lfiwzx): Add support for loading into the + Altivec registers with -mpower8-vector. Use wu/wv constraints to + only do VSX memory options on Altivec registers. + (extendsidi2_lfiwax): Likewise. + (extendsfdf2_fpr): Likewise. + (mov_hardfloat, SF/SD modes): Likewise. + (mov_hardfloat32, DF/DD modes): Likewise. + (mov_hardfloat64, DF/DD modes): Likewise. + (movdi_internal64): Likewise. + + Backport from mainline + 2013-09-23 Michael Meissner + + * config/rs6000/rs6000.c (rs6000_vector_reload): Delete, combine + reload helper function arrays into a single array reg_addr. + (reload_fpr_gpr): Likewise. + (reload_gpr_vsx): Likewise. + (reload_vsx_gpr): Likewise. + (struct rs6000_reg_addr): Likewise. + (reg_addr): Likewise. + (rs6000_debug_reg_global): Change rs6000_vector_reload, + reload_fpr_gpr, reload_gpr_vsx, reload_vsx_gpr uses to reg_addr. + (rs6000_init_hard_regno_mode_ok): Likewise. + (rs6000_secondary_reload_direct_move): Likewise. + (rs6000_secondary_reload): Likewise. + + * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add new + constraints: wu, ww, and wy. Repurpose wv constraint added during + power8 changes. Put wg constraint in alphabetical order. + + * config/rs6000/rs6000.opt (-mvsx-scalar-float): New debug switch + for future work to add ISA 2.07 VSX single precision support. + (-mvsx-scalar-double): Change default from -1 to 1, update + documentation comment. + (-mvsx-scalar-memory): Rename debug switch to -mupper-regs-df. + (-mupper-regs-df): New debug switch to control whether DF values + can go in the traditional Altivec registers. + (-mupper-regs-sf): New debug switch to control whether SF values + can go in the traditional Altivec registers. + + * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print wu, ww, + and wy constraints. + (rs6000_init_hard_regno_mode_ok): Use ssize_t instead of int for + loop variables. Rename -mvsx-scalar-memory to -mupper-regs-df. + Add new constraints, wu/ww/wy. Repurpose wv constraint. + (rs6000_debug_legitimate_address_p): Print if we are running + before, during, or after reload. + (rs6000_secondary_reload): Add a comment. + (rs6000_opt_masks): Add -mupper-regs-df, -mupper-regs-sf. + + * config/rs6000/constraints.md (wa constraint): Sort w + constraints. Update documentation string. + (wd constraint): Likewise. + (wf constraint): Likewise. + (wg constraint): Likewise. + (wn constraint): Likewise. + (ws constraint): Likewise. + (wt constraint): Likewise. + (wx constraint): Likewise. + (wz constraint): Likewise. + (wu constraint): New constraint for ISA 2.07 SFmode scalar + instructions. + (ww constraint): Likewise. + (wy constraint): Likewise. + (wv constraint): Repurpose ISA 2.07 constraint that did not use in + the previous submissions. + * doc/md.texi (PowerPC and IBM RS6000): Likewise. + + Backport from mainline + 2013-10-17 Michael Meissner + + PR target/58673 + * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only + restrict TImode addresses to single indirect registers if both + -mquad-memory and -mvsx-timode are used. + (rs6000_output_move_128bit): Use quad_load_store_p to determine if + we should emit load/store quad. Remove using %y for quad memory + addresses. + + * config/rs6000/rs6000.md (mov_ppc64, TI/PTImode): Add + constraints to allow load/store quad on machines where TImode is + not allowed in VSX registers. Use 'n' instead of 'F' constraint + for TImode to load integer constants. + + Backport from mainline + 2013-10-02 Michael Meissner + + PR target/58587 + * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Turn off + setting -mvsx-timode by default until the underlying problem is + fixed. + (RS6000_CPU, power7 defaults): Likewise. + + Backport from trunk + 2013-08-16 Michael Meissner + + PR target/58160 + * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the + memory rtx to contain ZERO_EXTEND and SIGN_EXTEND. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands + array instead of each individual operand as a separate argument. + (emit_fusion_gpr_load): Likewise. + (expand_fusion_gpr_load): Add new function declaration. + + * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling + signature to have the operands passed as an array, instead of as + separate arguments. Allow ZERO_EXTEND to be in the memory + address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not + depend on the register live/dead flags when peepholes are run. + (expand_fusion_gpr_load): New function to be called from the + peephole2 pass, to change the register that addis sets to be the + target register. + (emit_fusion_gpr_load): Change the calling signature to have the + operands passed as an array, instead of as separate arguments. + Allow ZERO_EXTEND to be in the memory address, and also + SIGN_EXTEND if -mpower8-fusion-sign. + + * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused + unspec enumeration. + (power8 fusion peephole/peephole2): Rework the fusion peepholes to + adjust the register addis loads up in the peephole2 pass. Do not + depend on the register live/dead state when the peephole pass is + done. + + Backport from trunk + 2013-07-23 Michael Meissner + + * config/rs6000/vector.md (xor3): Move 128-bit boolean + expanders to rs6000.md. + (ior3): Likewise. + (and3): Likewise. + (one_cmpl2): Likewise. + (nor3): Likewise. + (andc3): Likewise. + (eqv3): Likewise. + (nand3): Likewise. + (orc3): Likewise. + + * config/rs6000/rs6000-protos.h (rs6000_split_logical): New + declaration. + + * config/rs6000/rs6000.c (rs6000_split_logical_inner): Add support + to split multi-word logical operations. + (rs6000_split_logical_di): Likewise. + (rs6000_split_logical): Likewise. + + * config/rs6000/vsx.md (VSX_L2): Delete, no longer used. + (vsx_and3_32bit): Move 128-bit logical insns to rs6000.md, + and allow TImode operations in 32-bit. + (vsx_and3_64bit): Likewise. + (vsx_ior3_32bit): Likewise. + (vsx_ior3_64bit): Likewise. + (vsx_xor3_32bit): Likewise. + (vsx_xor3_64bit): Likewise. + (vsx_one_cmpl2_32bit): Likewise. + (vsx_one_cmpl2_64bit): Likewise. + (vsx_nor3_32bit): Likewise. + (vsx_nor3_64bit): Likewise. + (vsx_andc3_32bit): Likewise. + (vsx_andc3_64bit): Likewise. + (vsx_eqv3_32bit): Likewise. + (vsx_eqv3_64bit): Likewise. + (vsx_nand3_32bit): Likewise. + (vsx_nand3_64bit): Likewise. + (vsx_orc3_32bit): Likewise. + (vsx_orc3_64bit): Likewise. + + * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Always allow vector + logical types in GPRs. + + * config/rs6000/altivec.md (altivec_and3): Move 128-bit + logical insns to rs6000.md, and allow TImode operations in + 32-bit. + (altivec_ior3): Likewise. + (altivec_xor3): Likewise. + (altivec_one_cmpl2): Likewise. + (altivec_nor3): Likewise. + (altivec_andc3): Likewise. + + * config/rs6000/rs6000.md (BOOL_128): New mode iterators and mode + attributes for moving the 128-bit logical operations into + rs6000.md. + (BOOL_REGS_OUTPUT): Likewise. + (BOOL_REGS_OP1): Likewise. + (BOOL_REGS_OP2): Likewise. + (BOOL_REGS_UNARY): Likewise. + (BOOL_REGS_AND_CR0): Likewise. + (one_cmpl2): Add support for DI logical operations on + 32-bit, splitting the operations to 32-bit. + (anddi3): Likewise. + (iordi3): Likewise. + (xordi3): Likewise. + (and3, 128-bit types): Rewrite 2013-06-06 logical operator + changes to combine the 32/64-bit code, allow logical operations on + TI mode in 32-bit, and to use similar match_operator patterns like + scalar mode uses. Combine the Altivec and VSX code for logical + operations, and move it here. + (ior3, 128-bit types): Likewise. + (xor3, 128-bit types): Likewise. + (one_cmpl3, 128-bit types): Likewise. + (nor3, 128-bit types): Likewise. + (andc3, 128-bit types): Likewise. + (eqv3, 128-bit types): Likewise. + (nand3, 128-bit types): Likewise. + (orc3, 128-bit types): Likewise. + (and3_internal): Likewise. + (bool3_internal): Likewise. + (boolc3_internal1): Likewise. + (boolc3_internal2): Likewise. + (boolcc3_internal1): Likewise. + (boolcc3_internal2): Likewise. + (eqv3_internal1): Likewise. + (eqv3_internal2): Likewise. + (one_cmpl13_internal): Likewise. + + Back port from mainline: + 2013-06-06 Michael Meissner + Pat Haugen + Peter Bergner + + * lib/target-supports.exp (check_p8vector_hw_available) Add power8 + support. + (check_effective_target_powerpc_p8vector_ok): Likewise. + (is-effective-target): Likewise. + (check_vect_support_and_set_flags): Likewise. + + Backport from mainline + 2013-07-31 Michael Meissner + + * config/rs6000/predicates.md (fusion_gpr_addis): New predicates + to support power8 load fusion. + (fusion_gpr_mem_load): Likewise. + + * config/rs6000/rs6000-modes.def (PTImode): Update a comment. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New + declarations for power8 load fusion. + (emit_fusion_gpr_load): Likewise. + + * config/rs6000/rs6000.c (rs6000_option_override_internal): If + tuning for power8, turn on fusion mode by default. Turn on sign + extending fusion mode if normal fusion mode is on, and we are at + -O2 or -O3. + (fusion_gpr_load_p): New function, return true if we can fuse an + addis instruction with a dependent load to a GPR. + (emit_fusion_gpr_load): Emit the instructions for power8 load + fusion to GPRs. + + * config/rs6000/vsx.md (VSX_M2): New iterator for fusion + peepholes. + (VSX load fusion peepholes): New peepholes to fuse together an + addi instruction with a VSX load instruction. + + * config/rs6000/rs6000.md (GPR load fusion peepholes): New + peepholes to fuse an addis instruction with a load to a GPR base + register. If we are supporting sign extending fusions, convert + sign extending loads to zero extending loads and add an explicit + sign extension. + + Backport from mainline + 2013-07-18 Pat Haugen + + * config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag + interaction for new Power8 flags and VSX. + + Back port from the trunk + 2013-06-28 Michael Meissner + + PR target/57744 + * config/rs6000/rs6000.h (MODES_TIEABLE_P): Do not allow PTImode + to tie with any other modes. Eliminate Altivec vector mode tests, + since these are a subset of ALTIVEC or VSX vector modes. Simplify + code, to return 0 if testing MODE2 for a condition, if we've + already tested MODE1 for the same condition. + + Backport from mainline + 2013-06-28 Pat Haugen + + * config/rs6000/rs6000.md (define_insn ""): Fix insn type. + + Back port from the trunk + 2013-06-26 Michael Meissner + Pat Haugen + Peter Bergner + + * config/rs6000/power8.md: New. + * config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor + setting for power8 entry. + * config/rs6000/t-rs6000 (MD_INCLUDES): Add power8.md. + * config/rs6000/rs6000.c (is_microcoded_insn, is_cracked_insn): Adjust + test for Power4/Power5 only. + (insn_must_be_first_in_group, insn_must_be_last_in_group): Add Power8 + support. + (force_new_group): Adjust comment. + * config/rs6000/rs6000.md: Include power8.md. + + Back port from the trunk + 2013-06-14 Michael Meissner + + PR target/57615 + * config/rs6000/rs6000.md (mov_ppc64): Call + rs6000_output_move_128bit to handle emitting quad memory + operations. Set attribute length to 8 bytes. + + Back port from the trunk + 2013-06-13 Michael Meissner + + * config/rs6000/rs6000.c (rs6000_option_override_internal): Move + test for clearing quad memory on 32-bit later. + + Back port from the trunk + + 2013-06-12 Michael Meissner + Pat Haugen + Peter Bergner + + * config/rs6000/rs6000.c (emit_load_locked): Add support for + power8 byte, half-word, and quad-word atomic instructions. + (emit_store_conditional): Likewise. + (rs6000_expand_atomic_compare_and_swap): Likewise. + (rs6000_expand_atomic_op): Likewise. + + * config/rs6000/sync.md (larx): Add new modes for power8. + (stcx): Likewise. + (AINT): New mode iterator to include TImode as well as normal + integer modes on power8. + (fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so + that VSX registers are not considered. Use AINT mode iterator + instead of INT1 to allow inclusion of quad word atomic operations + on power8. + (load_locked): Likewise. + (store_conditional): Likewise. + (atomic_compare_and_swap): Likewise. + (atomic_exchange): Likewise. + (atomic_nand): Likewise. + (atomic_fetch_): Likewise. + (atomic_nand_fetch): Likewise. + (mem_thread_fence): Use gen_loadsync_ instead of enumerating + each type. + (ATOMIC): On power8, add QImode, HImode modes. + (load_locked_si): Varients of load_locked for QI/HI + modes that promote to SImode. + (load_lockedti): Convert TImode arguments to PTImode, so that we + get a guaranteed even/odd register pair. + (load_lockedpti): Likewise. + (store_conditionalti): Likewise. + (store_conditionalpti): Likewise. + + * config/rs6000/rs6000.md (QHI): New mode iterator for power8 + atomic load/store instructions. + (HSI): Likewise. + + Back port from the trunk + + 2013-06-10 Michael Meissner + Pat Haugen + Peter Bergner + + * config/rs6000/vector.md (GPR move splitter): Do not split moves + of vectors in GPRS if they are direct moves or quad word load or + store moves. + + * config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add + declaration. + (direct_move_p): Likewise. + (quad_load_store_p): Likewise. + + * config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register + classes into bins based on the physical register type. + (reg_class_to_reg_type): Likewise. + (IS_STD_REG_TYPE): Likewise. + (IS_FP_VECT_REG_TYPE): Likewise. + (reload_fpr_gpr): Arrays to determine what insn to use if we can + use direct move instructions. + (reload_gpr_vsx): Likewise. + (reload_vsx_gpr): Likewise. + (rs6000_init_hard_regno_mode_ok): Precalculate the register type + information that is a simplification of register classes. Also + precalculate direct move reload helpers. + (direct_move_p): New function to return true if the operation can + be done as a direct move instruciton. + (quad_load_store_p): New function to return true if the operation + is a quad memory operation. + (rs6000_legitimize_address): If quad memory, only allow register + indirect for TImode addresses. + (rs6000_legitimate_address_p): Likewise. + (enum reload_reg_type): Delete, replace with rs6000_reg_type. + (rs6000_reload_register_type): Likewise. + (register_to_reg_type): Return register type. + (rs6000_secondary_reload_simple_move): New helper function for + secondary reload and secondary memory needed to identify anything + that is a simple move, and does not need reloading. + (rs6000_secondary_reload_direct_move): New helper function for + secondary reload to identify cases that can be done with several + instructions via the direct move instructions. + (rs6000_secondary_reload_move): New helper function for secondary + reload to identify moves between register types that can be done. + (rs6000_secondary_reload): Add support for quad memory operations + and for direct move. + (rs6000_secondary_memory_needed): Likewise. + (rs6000_debug_secondary_memory_needed): Change argument names. + (rs6000_output_move_128bit): New function to return the move to + use for 128-bit moves, including knowing about the various + limitations of quad memory operations. + + * config/rs6000/vsx.md (vsx_mov): Add support for quad + memory operations. call rs6000_output_move_128bit for the actual + instruciton(s) to generate. + (vsx_movti_64bit): Likewise. + + * config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values. + (UNSPEC_P8V_MTVSRWZ): Likewise. + (UNSPEC_P8V_RELOAD_FROM_GPR): Likewise. + (UNSPEC_P8V_MTVSRD): Likewise. + (UNSPEC_P8V_XXPERMDI): Likewise. + (UNSPEC_P8V_RELOAD_FROM_VSX): Likewise. + (UNSPEC_FUSION_GPR): Likewise. + (FMOVE128_GPR): New iterator for direct move. + (f32_lv): New mode attribute for load/store of SFmode/SDmode + values. + (f32_sv): Likewise. + (f32_dm): Likewise. + (zero_extenddi2_internal1): Add support for power8 32-bit + loads and direct move instructions. + (zero_extendsidi2_lfiwzx): Likewise. + (extendsidi2_lfiwax): Likewise. + (extendsidi2_nocell): Likewise. + (floatsi2_lfiwax): Likewise. + (lfiwax): Likewise. + (floatunssi2_lfiwzx): Likewise. + (lfiwzx): Likewise. + (fix_trunc_stfiwx): Likewise. + (fixuns_trunc_stfiwx): Likewise. + (mov_hardfloat, 32-bit floating point): Likewise. + (mov_hardfloat64, 64-bit floating point): Likewise. + (parity2_cmpb): Set length/type attr. + (unnamed shift right patterns, mov_internal2): Change type attr + for 'mr.' to fast_compare. + (bpermd_): Change type attr to popcnt. + (p8_fmrgow_): New insns for power8 direct move support. + (p8_mtvsrwz_1): Likewise. + (p8_mtvsrwz_2): Likewise. + (reload_fpr_from_gpr): Likewise. + (p8_mtvsrd_1): Likewise. + (p8_mtvsrd_2): Likewise. + (p8_xxpermdi_): Likewise. + (reload_vsx_from_gpr): Likewise. + (reload_vsx_from_gprsf): Likewise. + (p8_mfvsrd_3_): LIkewise. + (reload_gpr_from_vsx): Likewise. + (reload_gpr_from_vsxsf): Likewise. + (p8_mfvsrd_4_disf): Likewise. + (multi-word GPR splits): Do not split direct moves or quad memory + operations. + + Backport from the trunk + + 2013-06-06 Michael Meissner + Pat Haugen + Peter Bergner + + * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): + Document new power8 builtins. + + * config/rs6000/vector.md (and3): Add a clobber/scratch of a + condition code register, to allow 128-bit logical operations to be + done in the VSX or GPR registers. + (nor3): Use the canonical form for nor. + (eqv3): Add expanders for power8 xxleqv, xxlnand, xxlorc, + vclz*, and vpopcnt* vector instructions. + (nand3): Likewise. + (orc3): Likewise. + (clz2): LIkewise. + (popcount2): Likewise. + + * config/rs6000/predicates.md (int_reg_operand): Rework tests so + that only the GPRs are recognized. + + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + support for new power8 builtins. + + * config/rs6000/rs6000-builtin.def (xscvspdpn): Add new power8 + builtin functions. + (xscvdpspn): Likewise. + (vclz): Likewise. + (vclzb): Likewise. + (vclzh): Likewise. + (vclzw): Likewise. + (vclzd): Likewise. + (vpopcnt): Likewise. + (vpopcntb): Likewise. + (vpopcnth): Likewise. + (vpopcntw): Likewise. + (vpopcntd): Likewise. + (vgbbd): Likewise. + (vmrgew): Likewise. + (vmrgow): Likewise. + (eqv): Likewise. + (eqv_v16qi3): Likewise. + (eqv_v8hi3): Likewise. + (eqv_v4si3): Likewise. + (eqv_v2di3): Likewise. + (eqv_v4sf3): Likewise. + (eqv_v2df3): Likewise. + (nand): Likewise. + (nand_v16qi3): Likewise. + (nand_v8hi3): Likewise. + (nand_v4si3): Likewise. + (nand_v2di3): Likewise. + (nand_v4sf3): Likewise. + (nand_v2df3): Likewise. + (orc): Likewise. + (orc_v16qi3): Likewise. + (orc_v8hi3): Likewise. + (orc_v4si3): Likewise. + (orc_v2di3): Likewise. + (orc_v4sf3): Likewise. + (orc_v2df3): Likewise. + + * config/rs6000/rs6000.c (rs6000_option_override_internal): Only + allow power8 quad mode in 64-bit. + (rs6000_builtin_vectorized_function): Add support to vectorize + ISA 2.07 count leading zeros, population count builtins. + (rs6000_expand_vector_init): On ISA 2.07 use xscvdpspn to form + V4SF vectors instead of xscvdpsp to avoid IEEE related traps. + (builtin_function_type): Add vgbbd builtin function which takes an + unsigned argument. + (altivec_expand_vec_perm_const): Add support for new power8 merge + instructions. + + * config/rs6000/vsx.md (VSX_L2): New iterator for 128-bit types, + that does not include TImdoe for use with 32-bit. + (UNSPEC_VSX_CVSPDPN): Support for power8 xscvdpspn and xscvspdpn + instructions. + (UNSPEC_VSX_CVDPSPN): Likewise. + (vsx_xscvdpspn): Likewise. + (vsx_xscvspdpn): Likewise. + (vsx_xscvdpspn_scalar): Likewise. + (vsx_xscvspdpn_directmove): Likewise. + (vsx_and3): Split logical operations into 32-bit and + 64-bit. Add support to do logical operations on TImode as well as + VSX vector types. Allow logical operations to be done in either + VSX registers or in general purpose registers in 64-bit mode. Add + splitters if GPRs were used. For AND, add clobber of CCmode to + allow use of ANDI on GPRs. Rewrite nor to use the canonical RTL + encoding. + (vsx_and3_32bit): Likewise. + (vsx_and3_64bit): Likewise. + (vsx_ior3): Likewise. + (vsx_ior3_32bit): Likewise. + (vsx_ior3_64bit): Likewise. + (vsx_xor3): Likewise. + (vsx_xor3_32bit): Likewise. + (vsx_xor3_64bit): Likewise. + (vsx_one_cmpl2): Likewise. + (vsx_one_cmpl2_32bit): Likewise. + (vsx_one_cmpl2_64bit): Likewise. + (vsx_nor3): Likewise. + (vsx_nor3_32bit): Likewise. + (vsx_nor3_64bit): Likewise. + (vsx_andc3): Likewise. + (vsx_andc3_32bit): Likewise. + (vsx_andc3_64bit): Likewise. + (vsx_eqv3_32bit): Add support for power8 xxleqv, xxlnand, + and xxlorc instructions. + (vsx_eqv3_64bit): Likewise. + (vsx_nand3_32bit): Likewise. + (vsx_nand3_64bit): Likewise. + (vsx_orc3_32bit): Likewise. + (vsx_orc3_64bit): Likewise. + + * config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Update comment. + + * config/rs6000/altivec.md (UNSPEC_VGBBD): Add power8 vgbbd + instruction. + (p8_vmrgew): Add power8 vmrgew and vmrgow instructions. + (p8_vmrgow): Likewise. + (altivec_and3): Add clobber of CCmode to allow AND using + GPRs to be split under VSX. + (p8v_clz2): Add power8 count leading zero support. + (p8v_popcount2): Add power8 population count support. + (p8v_vgbbd): Add power8 gather bits by bytes by doubleword + support. + + * config/rs6000/rs6000.md (eqv3): Add support for powerp eqv + instruction. + + * config/rs6000/altivec.h (vec_eqv): Add defines to export power8 + builtin functions. + (vec_nand): Likewise. + (vec_vclz): Likewise. + (vec_vclzb): Likewise. + (vec_vclzd): Likewise. + (vec_vclzh): Likewise. + (vec_vclzw): Likewise. + (vec_vgbbd): Likewise. + (vec_vmrgew): Likewise. + (vec_vmrgow): Likewise. + (vec_vpopcnt): Likewise. + (vec_vpopcntb): Likewise. + (vec_vpopcntd): Likewise. + (vec_vpopcnth): Likewise. + (vec_vpopcntw): Likewise. + + Backport from trunk + + 2013-05-29 Michael Meissner + Pat Haugen + Peter Bergner + + * config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI + instructions. + (VEC_A): Likewise. + (VEC_C): Likewise. + (vrotl3): Likewise. + (vashl3): Likewise. + (vlshr3): Likewise. + (vashr3): Likewise. + + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + support for power8 V2DI builtins. + + * config/rs6000/rs6000-builtin.def (abs_v2di): Add support for + power8 V2DI builtins. + (vupkhsw): Likewise. + (vupklsw): Likewise. + (vaddudm): Likewise. + (vminsd): Likewise. + (vmaxsd): Likewise. + (vminud): Likewise. + (vmaxud): Likewise. + (vpkudum): Likewise. + (vpksdss): Likewise. + (vpkudus): Likewise. + (vpksdus): Likewise. + (vrld): Likewise. + (vsld): Likewise. + (vsrd): Likewise. + (vsrad): Likewise. + (vsubudm): Likewise. + (vcmpequd): Likewise. + (vcmpgtsd): Likewise. + (vcmpgtud): Likewise. + (vcmpequd_p): Likewise. + (vcmpgtsd_p): Likewise. + (vcmpgtud_p): Likewise. + (vupkhsw): Likewise. + (vupklsw): Likewise. + (vaddudm): Likewise. + (vmaxsd): Likewise. + (vmaxud): Likewise. + (vminsd): Likewise. + (vminud): Likewise. + (vpksdss): Likewise. + (vpksdus): Likewise. + (vpkudum): Likewise. + (vpkudus): Likewise. + (vrld): Likewise. + (vsld): Likewise. + (vsrad): Likewise. + (vsrd): Likewise. + (vsubudm): Likewise. + + * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Add + support for power8 V2DI instructions. + + * config/rs6000/altivec.md (UNSPEC_VPKUHUM): Add support for + power8 V2DI instructions. Combine pack and unpack insns to use an + iterator for each mode. Check whether a particular mode supports + Altivec instructions instead of just checking TARGET_ALTIVEC. + (UNSPEC_VPKUWUM): Likewise. + (UNSPEC_VPKSHSS): Likewise. + (UNSPEC_VPKSWSS): Likewise. + (UNSPEC_VPKUHUS): Likewise. + (UNSPEC_VPKSHUS): Likewise. + (UNSPEC_VPKUWUS): Likewise. + (UNSPEC_VPKSWUS): Likewise. + (UNSPEC_VPACK_SIGN_SIGN_SAT): Likewise. + (UNSPEC_VPACK_SIGN_UNS_SAT): Likewise. + (UNSPEC_VPACK_UNS_UNS_SAT): Likewise. + (UNSPEC_VPACK_UNS_UNS_MOD): Likewise. + (UNSPEC_VUPKHSB): Likewise. + (UNSPEC_VUNPACK_HI_SIGN): Likewise. + (UNSPEC_VUNPACK_LO_SIGN): Likewise. + (UNSPEC_VUPKHSH): Likewise. + (UNSPEC_VUPKLSB): Likewise. + (UNSPEC_VUPKLSH): Likewise. + (VI2): Likewise. + (VI_char): Likewise. + (VI_scalar): Likewise. + (VI_unit): Likewise. + (VP): Likewise. + (VP_small): Likewise. + (VP_small_lc): Likewise. + (VU_char): Likewise. + (add3): Likewise. + (altivec_vaddcuw): Likewise. + (altivec_vaddus): Likewise. + (altivec_vaddss): Likewise. + (sub3): Likewise. + (altivec_vsubcuw): Likewise. + (altivec_vsubus): Likewise. + (altivec_vsubss): Likewise. + (altivec_vavgs): Likewise. + (altivec_vcmpbfp): Likewise. + (altivec_eq): Likewise. + (altivec_gt): Likewise. + (altivec_gtu): Likewise. + (umax3): Likewise. + (smax3): Likewise. + (umin3): Likewise. + (smin3): Likewise. + (altivec_vpkuhum): Likewise. + (altivec_vpkuwum): Likewise. + (altivec_vpkshss): Likewise. + (altivec_vpkswss): Likewise. + (altivec_vpkuhus): Likewise. + (altivec_vpkshus): Likewise. + (altivec_vpkuwus): Likewise. + (altivec_vpkswus): Likewise. + (altivec_vpksss): Likewise. + (altivec_vpksus): Likewise. + (altivec_vpkuus): Likewise. + (altivec_vpkuum): Likewise. + (altivec_vrl): Likewise. + (altivec_vsl): Likewise. + (altivec_vsr): Likewise. + (altivec_vsra): Likewise. + (altivec_vsldoi_): Likewise. + (altivec_vupkhsb): Likewise. + (altivec_vupkhs): Likewise. + (altivec_vupkls): Likewise. + (altivec_vupkhsh): Likewise. + (altivec_vupklsb): Likewise. + (altivec_vupklsh): Likewise. + (altivec_vcmpequ_p): Likewise. + (altivec_vcmpgts_p): Likewise. + (altivec_vcmpgtu_p): Likewise. + (abs2): Likewise. + (vec_unpacks_hi_v16qi): Likewise. + (vec_unpacks_hi_v8hi): Likewise. + (vec_unpacks_lo_v16qi): Likewise. + (vec_unpacks_hi_): Likewise. + (vec_unpacks_lo_v8hi): Likewise. + (vec_unpacks_lo_): Likewise. + (vec_pack_trunc_v8h): Likewise. + (vec_pack_trunc_v4si): Likewise. + (vec_pack_trunc_): Likewise. + + * config/rs6000/altivec.h (vec_vaddudm): Add defines for power8 + V2DI builtins. + (vec_vmaxsd): Likewise. + (vec_vmaxud): Likewise. + (vec_vminsd): Likewise. + (vec_vminud): Likewise. + (vec_vpksdss): Likewise. + (vec_vpksdus): Likewise. + (vec_vpkudum): Likewise. + (vec_vpkudus): Likewise. + (vec_vrld): Likewise. + (vec_vsld): Likewise. + (vec_vsrad): Likewise. + (vec_vsrd): Likewise. + (vec_vsubudm): Likewise. + (vec_vupkhsw): Likewise. + (vec_vupklsw): Likewise. + + 2013-05-22 Michael Meissner + Pat Haugen + Peter Bergner + + * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add + documentation for the power8 crypto builtins. + + * config/rs6000/t-rs6000 (MD_INCLUDES): Add crypto.md. + + * config/rs6000/rs6000-builtin.def (BU_P8V_AV_1): Add support + macros for defining power8 builtin functions. + (BU_P8V_AV_2): Likewise. + (BU_P8V_AV_P): Likewise. + (BU_P8V_VSX_1): Likewise. + (BU_P8V_OVERLOAD_1): Likewise. + (BU_P8V_OVERLOAD_2): Likewise. + (BU_CRYPTO_1): Likewise. + (BU_CRYPTO_2): Likewise. + (BU_CRYPTO_3): Likewise. + (BU_CRYPTO_OVERLOAD_1): Likewise. + (BU_CRYPTO_OVERLOAD_2): Likewise. + (XSCVSPDP): Fix typo, point to the correct instruction. + (VCIPHER): Add power8 crypto builtins. + (VCIPHERLAST): Likewise. + (VNCIPHER): Likewise. + (VNCIPHERLAST): Likewise. + (VPMSUMB): Likewise. + (VPMSUMH): Likewise. + (VPMSUMW): Likewise. + (VPERMXOR_V2DI): Likewise. + (VPERMXOR_V4SI: Likewise. + (VPERMXOR_V8HI: Likewise. + (VPERMXOR_V16QI: Likewise. + (VSHASIGMAW): Likewise. + (VSHASIGMAD): Likewise. + (VPMSUM): Likewise. + (VPERMXOR): Likewise. + (VSHASIGMA): Likewise. + + * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define + __CRYPTO__ if the crypto instructions are available. + (altivec_overloaded_builtins): Add support for overloaded power8 + builtins. + + * config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add + support for power8 crypto builtins. + (builtin_function_type): Likewise. + (altivec_init_builtins): Add support for builtins that take vector + long long (V2DI) arguments. + + * config/rs6000/crypto.md: New file, define power8 crypto + instructions. + + 2013-05-22 Michael Meissner + Pat Haugen + Peter Bergner + + * doc/invoke.texi (Option Summary): Add power8 options. + (RS/6000 and PowerPC Options): Likewise. + + * doc/md.texi (PowerPC and IBM RS6000 constraints): Update to use + constraints.md instead of rs6000.h. Reorder w* constraints. Add + wm, wn, wr documentation. + + * gcc/config/rs6000/constraints.md (wm): New constraint for VSX + registers if direct move instructions are enabled. + (wn): New constraint for no registers. + (wq): New constraint for quad word even GPR registers. + (wr): New constraint if 64-bit instructions are enabled. + (wv): New constraint if power8 vector instructions are enabled. + (wQ): New constraint for quad word memory locations. + + * gcc/config/rs6000/predicates.md (const_0_to_15_operand): New + constraint for 0..15 for crypto instructions. + (gpc_reg_operand): If VSX allow registers in VSX registers as well + as GPR and floating point registers. + (int_reg_operand): New predicate to match only GPR registers. + (base_reg_operand): New predicate to match base registers. + (quad_int_reg_operand): New predicate to match even GPR registers + for quad memory operations. + (vsx_reg_or_cint_operand): New predicate to allow vector logical + operations in both GPR and VSX registers. + (quad_memory_operand): New predicate for quad memory operations. + (reg_or_indexed_operand): New predicate for direct move support. + + * gcc/config/rs6000/rs6000-cpus.def (ISA_2_5_MASKS_EMBEDDED): + Inherit from ISA_2_4_MASKS, not ISA_2_2_MASKS. + (ISA_2_7_MASKS_SERVER): New mask for ISA 2.07 (i.e. power8). + (POWERPC_MASKS): Add power8 options. + (power8 cpu): Use ISA_2_7_MASKS_SERVER instead of specifying the + various options. + + * gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros): + Define _ARCH_PWR8 and __POWER8_VECTOR__ for power8. + + * gcc/config/rs6000/rs6000.opt (-mvsx-timode): Add documentation. + (-mpower8-fusion): New power8 options. + (-mpower8-fusion-sign): Likewise. + (-mpower8-vector): Likewise. + (-mcrypto): Likewise. + (-mdirect-move): Likewise. + (-mquad-memory): Likewise. + + * gcc/config/rs6000/rs6000.c (power8_cost): Initial definition for + power8. + (rs6000_hard_regno_mode_ok): Make PTImode only match even GPR + registers. + (rs6000_debug_reg_print): Print the base register class if + -mdebug=reg. + (rs6000_debug_vector_unit): Add p8_vector. + (rs6000_debug_reg_global): If -mdebug=reg, print power8 constraint + definitions. Also print fusion state. + (rs6000_init_hard_regno_mode_ok): Set up power8 constraints. + (rs6000_builtin_mask_calculate): Add power8 builtin support. + (rs6000_option_override_internal): Add support for power8. + (rs6000_common_init_builtins): Add debugging for skipped builtins + if -mdebug=builtin. + (rs6000_adjust_cost): Add power8 support. + (rs6000_issue_rate): Likewise. + (insn_must_be_first_in_group): Likewise. + (insn_must_be_last_in_group): Likewise. + (force_new_group): Likewise. + (rs6000_register_move_cost): Likewise. + (rs6000_opt_masks): Likewise. + + * config/rs6000/rs6000.h (ASM_CPU_POWER8_SPEC): If we don't have a + power8 capable assembler, default to power7 options. + (TARGET_DIRECT_MOVE): Likewise. + (TARGET_CRYPTO): Likewise. + (TARGET_P8_VECTOR): Likewise. + (VECTOR_UNIT_P8_VECTOR_P): Define power8 vector support. + (VECTOR_UNIT_VSX_OR_P8_VECTOR_P): Likewise. + (VECTOR_MEM_P8_VECTOR_P): Likewise. + (VECTOR_MEM_VSX_OR_P8_VECTOR_P): Likewise. + (VECTOR_MEM_ALTIVEC_OR_VSX_P): Likewise. + (TARGET_XSCVDPSPN): Likewise. + (TARGET_XSCVSPDPN): Likewsie. + (TARGET_SYNC_HI_QI): Likewise. + (TARGET_SYNC_TI): Likewise. + (MASK_CRYPTO): Likewise. + (MASK_DIRECT_MOVE): Likewise. + (MASK_P8_FUSION): Likewise. + (MASK_P8_VECTOR): Likewise. + (REG_ALLOC_ORDER): Move fr13 to be lower in priority so that the + TFmode temporary used by some of the direct move instructions to + get two FP temporary registers does not force creation of a stack + frame. + (VLOGICAL_REGNO_P): Allow vector logical operations in GPRs. + (MODES_TIEABLE_P): Move the VSX tests above the Altivec tests so + that any VSX registers are tieable, even if they are also an + Altivec vector mode. + (r6000_reg_class_enum): Add wm, wr, wv constraints. + (RS6000_BTM_P8_VECTOR): Power8 builtin support. + (RS6000_BTM_CRYPTO): Likewise. + (RS6000_BTM_COMMON): Likewise. + + * config/rs6000/rs6000.md (cpu attribute): Add power8. + * config/rs6000/rs6000-opts.h (PROCESSOR_POWER8): Likewise. + (enum rs6000_vector): Add power8 vector support. + + + Backport from mainline + 2013-03-20 Pat Haugen + + * config/rs6000/predicates.md (indexed_address, update_address_mem + update_indexed_address_mem): New predicates. + * config/rs6000/vsx.md (vsx_extract__zero): Set correct "type" + attribute for load/store instructions. + * config/rs6000/dfp.md (movsd_store): Likewise. + (movsd_load): Likewise. + * config/rs6000/rs6000.md (zero_extenddi2_internal1): Likewise. + (unnamed HI->DI extend define_insn): Likewise. + (unnamed SI->DI extend define_insn): Likewise. + (unnamed QI->SI extend define_insn): Likewise. + (unnamed QI->HI extend define_insn): Likewise. + (unnamed HI->SI extend define_insn): Likewise. + (unnamed HI->SI extend define_insn): Likewise. + (extendsfdf2_fpr): Likewise. + (movsi_internal1): Likewise. + (movsi_internal1_single): Likewise. + (movhi_internal): Likewise. + (movqi_internal): Likewise. + (movcc_internal1): Correct mnemonic for stw insn. Set correct "type" + attribute for load/store instructions. + (mov_hardfloat): Set correct "type" attribute for load/store + instructions. + (mov_softfloat): Likewise. + (mov_hardfloat32): Likewise. + (mov_hardfloat64): Likewise. + (mov_softfloat64): Likewise. + (movdi_internal32): Likewise. + (movdi_internal64): Likewise. + (probe_stack_): Likewise. + + Backport from mainline + 2013-03-20 Michael Meissner + + * config/rs6000/vector.md (VEC_R): Add 32-bit integer, binary + floating point, and decimal floating point to reload iterator. + + * config/rs6000/constraints.md (wl constraint): New constraints to + return FLOAT_REGS if certain options are used to reduce the number + of separate patterns that exist in the file. + (wx constraint): Likewise. + (wz constraint): Likewise. + + * config/rs6000/rs6000.c (rs6000_debug_reg_global): If + -mdebug=reg, print wg, wl, wx, and wz constraints. + (rs6000_init_hard_regno_mode_ok): Initialize new constraints. + Initialize the reload functions for 64-bit binary/decimal floating + point types. + (reg_offset_addressing_ok_p): If we are on a power7 or later, use + LFIWZX and STFIWX to load/store 32-bit decimal types, and don't + create the buffer on the stack to overcome not having a 32-bit + load and store. + (rs6000_emit_move): Likewise. + (rs6000_secondary_memory_needed_rtx): Likewise. + (rs6000_alloc_sdmode_stack_slot): Likewise. + (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f + via xxlxor, just like DFmode 0.0. + + * config/rs6000/rs6000.h (TARGET_NO_SDMODE_STACK): New macro) + (define as 1 if we are running on a power7 or newer. + (enum r6000_reg_class_enum): Add new constraints. + + * config/rs6000/dfp.md (movsd): Delete, combine with binary + floating point moves in rs6000.md. Combine power6x (mfpgpr) moves + with other moves by using conditional constraits (wg). Use LFIWZX + and STFIWX for loading SDmode on power7. Use xxlxor to create + 0.0f. + (movsd splitter): Likewise. + (movsd_hardfloat): Likewise. + (movsd_softfloat): Likewise. + + * config/rs6000/rs6000.md (FMOVE32): New iterators to combine + binary and decimal floating point moves. + (fmove_ok): New attributes to combine binary and decimal floating + point moves, and to combine power6x (mfpgpr) moves along normal + floating moves. + (real_value_to_target): Likewise. + (f32_lr): Likewise. + (f32_lm): Likewise. + (f32_li): Likewise. + (f32_sr): Likewise. + (f32_sm): Likewise. + (f32_si): Likewise. + (movsf): Combine binary and decimal floating point moves. Combine + power6x (mfpgpr) moves with other moves by using conditional + constraits (wg). Use LFIWZX and STFIWX for loading SDmode on + power7. + (mov for SFmode/SDmode); Likewise. + (SFmode/SDmode splitters): Likewise. + (movsf_hardfloat): Likewise. + (mov_hardfloat for SFmode/SDmode): Likewise. + (movsf_softfloat): Likewise. + (mov_softfloat for SFmode/SDmode): Likewise. + + * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wl) + (wx and wz constraints. + + * config/rs6000/constraints.md (wg constraint): New constraint to + return FLOAT_REGS if -mmfpgpr (power6x) was used. + + * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wg + constraint. + + * config/rs6000/rs6000.c (rs6000_debug_reg_global): If + -mdebug=reg, print wg, wl, wx, and wz constraints. + (rs6000_init_hard_regno_mode_ok): Initialize new constraints. + Initialize the reload functions for 64-bit binary/decimal floating + point types. + (reg_offset_addressing_ok_p): If we are on a power7 or later, use + LFIWZX and STFIWX to load/store 32-bit decimal types, and don't + create the buffer on the stack to overcome not having a 32-bit + load and store. + (rs6000_emit_move): Likewise. + (rs6000_secondary_memory_needed_rtx): Likewise. + (rs6000_alloc_sdmode_stack_slot): Likewise. + (rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f + via xxlxor, just like DFmode 0.0. + + + * config/rs6000/dfp.md (movdd): Delete, combine with binary + floating point moves in rs6000.md. Combine power6x (mfpgpr) moves + with other moves by using conditional constraits (wg). Use LFIWZX + and STFIWX for loading SDmode on power7. + (movdd splitters): Likewise. + (movdd_hardfloat32): Likewise. + (movdd_softfloat32): Likewise. + (movdd_hardfloat64_mfpgpr): Likewise. + (movdd_hardfloat64): Likewise. + (movdd_softfloat64): Likewise. + + * config/rs6000/rs6000.md (FMOVE64): New iterators to combine + 64-bit binary and decimal floating point moves. + (FMOVE64X): Likewise. + (movdf): Combine 64-bit binary and decimal floating point moves. + Combine power6x (mfpgpr) moves with other moves by using + conditional constraits (wg). + (mov for DFmode/DDmode): Likewise. + (DFmode/DDmode splitters): Likewise. + (movdf_hardfloat32): Likewise. + (mov_hardfloat32 for DFmode/DDmode): Likewise. + (movdf_softfloat32): Likewise. + (movdf_hardfloat64_mfpgpr): Likewise. + (movdf_hardfloat64): Likewise. + (mov_hardfloat64 for DFmode/DDmode): Likewise. + (movdf_softfloat64): Likewise. + (mov_softfloat64 for DFmode/DDmode): Likewise. + (reload__load): Move to later in the file so they aren't in + the middle of the floating point move insns. + (reload__store): Likewise. + + * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wg + constraint. + + * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print out wg + constraint if -mdebug=reg. + (rs6000_initi_hard_regno_mode_ok): Enable wg constraint if + -mfpgpr. Enable using dd reload support if needed. + + * config/rs6000/dfp.md (movtd): Delete, combine with 128-bit + binary and decimal floating point moves in rs6000.md. + (movtd_internal): Likewise. + + * config/rs6000/rs6000.md (FMOVE128): Combine 128-bit binary and + decimal floating point moves. + (movtf): Likewise. + (movtf_internal): Likewise. + (mov_internal, TDmode/TFmode): Likewise. + (movtf_softfloat): Likewise. + (mov_softfloat, TDmode/TFmode): Likewise. + + * config/rs6000/rs6000.md (movdi_mfpgpr): Delete, combine with + movdi_internal64, using wg constraint for move direct operations. + (movdi_internal64): Likewise. + + * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print + MODES_TIEABLE_P for selected modes. Print the numerical value of + the various virtual registers. Use GPR/FPR first/last values) + (instead of hard coding the register numbers. Print which modes + have reload functions registered. + (rs6000_option_override_internal): If -mdebug=reg, trace the + options settings before/after setting cpu, target and subtarget + settings. + (rs6000_secondary_reload_trace): Improve the RTL dump for + -mdebug=addr and for secondary reload failures in + rs6000_secondary_reload_inner. + (rs6000_secondary_reload_fail): Likewise. + (rs6000_secondary_reload_inner): Likewise. + + * config/rs6000/rs6000.md (FIRST_GPR_REGNO): Add convenience + macros for first/last GPR and FPR registers. + (LAST_GPR_REGNO): Likewise. + (FIRST_FPR_REGNO): Likewise. + (LAST_FPR_REGNO): Likewise. + + * config/rs6000/vector.md (mul3): Use the combined macro + VECTOR_UNIT_ALTIVEC_OR_VSX_P instead of separate calls to + VECTOR_UNIT_ALTIVEC_P and VECTOR_UNIT_VSX_P. + (vcond): Likewise. + (vcondu): Likewise. + (vector_gtu): Likewise. + (vector_gte): Likewise. + (xor3): Don't allow logical operations on TImode in 32-bit + to prevent the compiler from converting DImode operations to + TImode. + (ior3): Likewise. + (and3): Likewise. + (one_cmpl2): Likewise. + (nor3): Likewise. + (andc3): Likewise. + + * config/rs6000/constraints.md (wt constraint): New constraint + that returns VSX_REGS if TImode is allowed in VSX registers. + + * config/rs6000/predicates.md (easy_fp_constant): 0.0f is an easy + constant under VSX. + + * config/rs6000/rs6000-modes.def (PTImode): Define, PTImode is + similar to TImode, but it is restricted to being in the GPRs. + + * config/rs6000/rs6000.opt (-mvsx-timode): New switch to allow + TImode to occupy a single VSX register. + + * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Default to + -mvsx-timode for power7/power8. + (power7 cpu): Likewise. + (power8 cpu): Likewise. + + * config/rs6000/rs6000.c (rs6000_hard_regno_nregs_internal): Make + sure that TFmode/TDmode take up two registers if they are ever + allowed in the upper VSX registers. + (rs6000_hard_regno_mode_ok): If -mvsx-timode, allow TImode in VSX + registers. + (rs6000_init_hard_regno_mode_ok): Likewise. + (rs6000_debug_reg_global): Add debugging for PTImode and wt + constraint. Print if LRA is turned on. + (rs6000_option_override_internal): Give an error if -mvsx-timode + and VSX is not enabled. + (invalid_e500_subreg): Handle PTImode, restricting it to GPRs. If + -mvsx-timode, restrict TImode to reg+reg addressing, and PTImode + to reg+offset addressing. Use PTImode when checking offset + addresses for validity. + (reg_offset_addressing_ok_p): Likewise. + (rs6000_legitimate_offset_address_p): Likewise. + (rs6000_legitimize_address): Likewise. + (rs6000_legitimize_reload_address): Likewise. + (rs6000_legitimate_address_p): Likewise. + (rs6000_eliminate_indexed_memrefs): Likewise. + (rs6000_emit_move): Likewise. + (rs6000_secondary_reload): Likewise. + (rs6000_secondary_reload_inner): Handle PTImode. Allow 64-bit + reloads to fpr registers to continue to use reg+offset addressing) + (but 64-bit reloads to altivec registers need reg+reg addressing. + Drop test for PRE_MODIFY, since VSX loads/stores no longer support + it. Treat LO_SUM like a PLUS operation. + (rs6000_secondary_reload_class): If type is 64-bit, prefer to use + FLOAT_REGS instead of VSX_RGS to allow use of reg+offset + addressing. + (rs6000_cannot_change_mode_class): Do not allow TImode in VSX + registers to share a register with a smaller sized type, since VSX + puts scalars in the upper 64-bits. + (print_operand): Add support for PTImode. + (rs6000_register_move_cost): Use VECTOR_MEM_VSX_P instead of + VECTOR_UNIT_VSX_P to catch types that can be loaded in VSX + registers, but don't have arithmetic support. + (rs6000_memory_move_cost): Add test for VSX. + (rs6000_opt_masks): Add -mvsx-timode. + + * config/rs6000/vsx.md (VSm): Change to use 64-bit aligned moves + for TImode. + (VSs): Likewise. + (VSr): Use wt constraint for TImode. + (VSv): Drop TImode support. + (vsx_movti): Delete, replace with versions for 32-bit and 64-bit. + (vsx_movti_64bit): Likewise. + (vsx_movti_32bit): Likewise. + (vec_store_): Use VSX iterator instead of vector iterator. + (vsx_and3): Delete use of '?' constraint on inputs, just put + one '?' on the appropriate output constraint. Do not allow TImode + logical operations on 32-bit systems. + (vsx_ior3): Likewise. + (vsx_xor3): Likewise. + (vsx_one_cmpl2): Likewise. + (vsx_nor3): Likewise. + (vsx_andc3): Likewise. + (vsx_concat_): Likewise. + (vsx_xxpermdi_): Fix thinko for non V2DF/V2DI modes. + + * config/rs6000/rs6000.h (MASK_VSX_TIMODE): Map from + OPTION_MASK_VSX_TIMODE. + (enum rs6000_reg_class_enum): Add RS6000_CONSTRAINT_wt. + (STACK_SAVEAREA_MODE): Use PTImode instead of TImode. + + * config/rs6000/rs6000.md (INT mode attribute): Add PTImode. + (TI2 iterator): New iterator for TImode, PTImode. + (wd mode attribute): Add values for vector types. + (movti_string): Replace TI move operations with operations for + TImode and PTImode. Add support for TImode being allowed in VSX + registers. + (mov_string, TImode/PTImode): Likewise. + (movti_ppc64): Likewise. + (mov_ppc64, TImode/PTImode): Likewise. + (TI mode splitters): Likewise. + + * doc/md.texi (PowerPC and IBM RS6000 constraints): Document wt + constraint. + 2014-04-04 Richard Biener * tree-ssanames.c (make_ssa_name_fn): Fix assert. diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index fd6d07f50ff7..4b91c5c5e242 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -321,6 +321,42 @@ #define vec_vsx_st __builtin_vec_vsx_st #endif +#ifdef _ARCH_PWR8 +/* Vector additions added in ISA 2.07. */ +#define vec_eqv __builtin_vec_eqv +#define vec_nand __builtin_vec_nand +#define vec_orc __builtin_vec_orc +#define vec_vaddudm __builtin_vec_vaddudm +#define vec_vclz __builtin_vec_vclz +#define vec_vclzb __builtin_vec_vclzb +#define vec_vclzd __builtin_vec_vclzd +#define vec_vclzh __builtin_vec_vclzh +#define vec_vclzw __builtin_vec_vclzw +#define vec_vgbbd __builtin_vec_vgbbd +#define vec_vmaxsd __builtin_vec_vmaxsd +#define vec_vmaxud __builtin_vec_vmaxud +#define vec_vminsd __builtin_vec_vminsd +#define vec_vminud __builtin_vec_vminud +#define vec_vmrgew __builtin_vec_vmrgew +#define vec_vmrgow __builtin_vec_vmrgow +#define vec_vpksdss __builtin_vec_vpksdss +#define vec_vpksdus __builtin_vec_vpksdus +#define vec_vpkudum __builtin_vec_vpkudum +#define vec_vpkudus __builtin_vec_vpkudus +#define vec_vpopcnt __builtin_vec_vpopcnt +#define vec_vpopcntb __builtin_vec_vpopcntb +#define vec_vpopcntd __builtin_vec_vpopcntd +#define vec_vpopcnth __builtin_vec_vpopcnth +#define vec_vpopcntw __builtin_vec_vpopcntw +#define vec_vrld __builtin_vec_vrld +#define vec_vsld __builtin_vec_vsld +#define vec_vsrad __builtin_vec_vsrad +#define vec_vsrd __builtin_vec_vsrd +#define vec_vsubudm __builtin_vec_vsubudm +#define vec_vupkhsw __builtin_vec_vupkhsw +#define vec_vupklsw __builtin_vec_vupklsw +#endif + /* Predicates. For C++, we use templates in order to allow non-parenthesized arguments. For C, instead, we use macros since non-parenthesized arguments were diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 1b0b5c3fb132..db2e93a168a5 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -41,15 +41,11 @@ UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH - UNSPEC_VPKUHUM - UNSPEC_VPKUWUM UNSPEC_VPKPX - UNSPEC_VPKSHSS - UNSPEC_VPKSWSS - UNSPEC_VPKUHUS - UNSPEC_VPKSHUS - UNSPEC_VPKUWUS - UNSPEC_VPKSWUS + UNSPEC_VPACK_SIGN_SIGN_SAT + UNSPEC_VPACK_SIGN_UNS_SAT + UNSPEC_VPACK_UNS_UNS_SAT + UNSPEC_VPACK_UNS_UNS_MOD UNSPEC_VSLV4SI UNSPEC_VSLO UNSPEC_VSR @@ -71,12 +67,10 @@ UNSPEC_VLOGEFP UNSPEC_VEXPTEFP UNSPEC_VLSDOI - UNSPEC_VUPKHSB + UNSPEC_VUNPACK_HI_SIGN + UNSPEC_VUNPACK_LO_SIGN UNSPEC_VUPKHPX - UNSPEC_VUPKHSH - UNSPEC_VUPKLSB UNSPEC_VUPKLPX - UNSPEC_VUPKLSH UNSPEC_DST UNSPEC_DSTT UNSPEC_DSTST @@ -134,6 +128,7 @@ UNSPEC_VUPKLS_V4SF UNSPEC_VUPKHU_V4SF UNSPEC_VUPKLU_V4SF + UNSPEC_VGBBD ]) (define_c_enum "unspecv" @@ -146,6 +141,8 @@ ;; Vec int modes (define_mode_iterator VI [V4SI V8HI V16QI]) +;; Like VI, but add ISA 2.07 integer vector ops +(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) ;; Short vec in modes (define_mode_iterator VIshort [V8HI V16QI]) ;; Vec float modes @@ -159,8 +156,18 @@ ;; Like VM, except don't do TImode (define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI]) -(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") + (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") + (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) + +;; Vector pack/unpack +(define_mode_iterator VP [V2DI V4SI V8HI]) +(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) ;; Vector move instructions. (define_insn "*altivec_mov" @@ -378,10 +385,10 @@ ;; add (define_insn "add3" - [(set (match_operand:VI 0 "register_operand" "=v") - (plus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (plus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vaddum %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -398,17 +405,17 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VADDCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vaddcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "altivec_vaddus" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] + (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDU)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "" "vaddus %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -418,16 +425,16 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDS)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (mode)" "vaddss %0,%1,%2" [(set_attr "type" "vecsimple")]) ;; sub (define_insn "sub3" - [(set (match_operand:VI 0 "register_operand" "=v") - (minus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (minus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vsubum %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -444,7 +451,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUBCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vsubcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -454,7 +461,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBU)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (mode)" "vsubus %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -464,7 +471,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBS)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (mode)" "vsubss %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -483,7 +490,7 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VAVGS))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (mode)" "vavgs %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -492,31 +499,31 @@ (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")] UNSPEC_VCMPBFP))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vcmpbfp %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_eq" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (eq:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "" "vcmpequ %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_gt" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gt:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "" "vcmpgts %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_gtu" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "" "vcmpgtu %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -744,18 +751,18 @@ ;; max (define_insn "umax3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vmaxu %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smax3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vmaxs %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -768,18 +775,18 @@ [(set_attr "type" "veccmp")]) (define_insn "umin3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vminu %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smin3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vmins %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -935,6 +942,31 @@ "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) +;; Power8 vector merge even/odd +(define_insn "p8_vmrgew" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_P8_VECTOR" + "vmrgew %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgow" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "TARGET_P8_VECTOR" + "vmrgow %0,%1,%2" + [(set_attr "type" "vecperm")]) + (define_insn "vec_widen_umult_even_v16qi" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -1008,74 +1040,7 @@ [(set_attr "type" "veccomplex")]) -;; logical ops. Have the logical ops follow the memory ops in -;; terms of whether to prefer VSX or Altivec - -(define_insn "*altivec_and3" - [(set (match_operand:VM 0 "register_operand" "=v") - (and:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (mode)" - "vand %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_ior3" - [(set (match_operand:VM 0 "register_operand" "=v") - (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (mode)" - "vor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_xor3" - [(set (match_operand:VM 0 "register_operand" "=v") - (xor:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (mode)" - "vxor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_one_cmpl2" - [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (match_operand:VM 1 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (mode)" - "vnor %0,%1,%1" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_nor3" - [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v"))))] - "VECTOR_MEM_ALTIVEC_P (mode)" - "vnor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_andc3" - [(set (match_operand:VM 0 "register_operand" "=v") - (and:VM (not:VM (match_operand:VM 2 "register_operand" "v")) - (match_operand:VM 1 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (mode)" - "vandc %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vpkuhum" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - "vpkuhum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkuwum" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - "vpkuwum %0,%1,%2" - [(set_attr "type" "vecperm")]) - +;; Vector pack/unpack (define_insn "altivec_vpkpx" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") @@ -1085,71 +1050,47 @@ "vpkpx %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshss" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshss %0,%1,%2" +(define_insn "altivec_vpksss" + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_SIGN_SAT))] + "" + "vpksss %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkswss" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswss %0,%1,%2" +(define_insn "altivec_vpksus" + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_UNS_SAT))] + "" + "vpksus %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkuhus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuhus %0,%1,%2" +(define_insn "altivec_vpkuus" + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_SAT))] + "" + "vpkuus %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshus %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkuwus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuwus %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkswus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswus %0,%1,%2" +(define_insn "altivec_vpkuum" + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "" + "vpkuum %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vrl" - [(set (match_operand:VI 0 "register_operand" "=v") - (rotate:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vrl %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1172,26 +1113,26 @@ [(set_attr "type" "vecperm")]) (define_insn "*altivec_vsl" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashift:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vsl %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsr" - [(set (match_operand:VI 0 "register_operand" "=v") - (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vsr %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsra" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "" "vsra %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1476,12 +1417,20 @@ "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - "vupkhsb %0,%1" +(define_insn "altivec_vupkhs" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand: 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "" + "vupkhs %0,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupkls" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand: 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "" + "vupkls %0,%1" [(set_attr "type" "vecperm")]) (define_insn "altivec_vupkhpx" @@ -1492,22 +1441,6 @@ "vupkhpx %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - "vupkhsh %0,%1" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vupklsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - "vupklsb %0,%1" - [(set_attr "type" "vecperm")]) - (define_insn "altivec_vupklpx" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] @@ -1516,49 +1449,41 @@ "vupklpx %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupklsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - "vupklsh %0,%1" - [(set_attr "type" "vecperm")]) - ;; Compare vectors producing a vector result and a predicate, setting CR6 to ;; indicate a combined status (define_insn "*altivec_vcmpequ_p" [(set (reg:CC 74) - (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (eq:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (eq:VI2 (match_dup 1) + (match_dup 2)))] + "" "vcmpequ. %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgts_p" [(set (reg:CC 74) - (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gt:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (gt:VI2 (match_dup 1) + (match_dup 2)))] + "" "vcmpgts. %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgtu_p" [(set (reg:CC 74) - (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gtu:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (gtu:VI2 (match_dup 1) + (match_dup 2)))] + "" "vcmpgtu. %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -1779,20 +1704,28 @@ [(set_attr "type" "vecstore")]) ;; Generate -;; vspltis? SCRATCH0,0 +;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 ;; vsubu?m SCRATCH2,SCRATCH1,%1 ;; vmaxs? %0,%1,SCRATCH2" (define_expand "abs2" - [(set (match_dup 2) (vec_duplicate:VI (const_int 0))) - (set (match_dup 3) - (minus:VI (match_dup 2) - (match_operand:VI 1 "register_operand" "v"))) - (set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_dup 1) (match_dup 3)))] - "TARGET_ALTIVEC" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_dup 1) (match_dup 4)))] + "" { - operands[2] = gen_reg_rtx (GET_MODE (operands[0])); - operands[3] = gen_reg_rtx (GET_MODE (operands[0])); + int i, n_elt = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (mode); + operands[3] = gen_rtx_CONST_VECTOR (mode, v); + operands[4] = gen_reg_rtx (mode); }) ;; Generate @@ -1950,49 +1883,19 @@ DONE; }") -(define_expand "vec_unpacks_hi_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsb (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_hi_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsh (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_lo_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsb (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_hi_" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand: 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "" + "") -(define_expand "vec_unpacks_lo_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsh (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_lo_" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand: 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "" + "") (define_insn "vperm_v8hiv4si" [(set (match_operand:V4SI 0 "register_operand" "=v") @@ -2291,29 +2194,13 @@ DONE; }") -(define_expand "vec_pack_trunc_v8hi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_pack_trunc_v4si" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2])); - DONE; -}") +(define_expand "vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "" + "") (define_expand "altivec_negv4sf2" [(use (match_operand:V4SF 0 "register_operand" "")) @@ -2460,3 +2347,34 @@ emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); DONE; }") + + +;; Power8 vector instructions encoded as Altivec instructions + +;; Vector count leading zeros +(define_insn "*p8v_clz2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vclz %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector population count +(define_insn "*p8v_popcount2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vpopcnt %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector Gather Bits by Bytes by Doubleword +(define_insn "p8v_vgbbd" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VGBBD))] + "TARGET_P8_VECTOR" + "vgbbd %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index ccb61edce45b..4467b9e3d8bf 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -52,21 +52,61 @@ "@internal") ;; Use w as a prefix to add VSX modes -;; vector double (V2DF) +;; any VSX register +(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]" + "Any VSX register if the -mvsx option was used or NO_REGS.") + (define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]" - "@internal") + "VSX vector register to hold vector double data or NO_REGS.") -;; vector float (V4SF) (define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]" - "@internal") + "VSX vector register to hold vector float data or NO_REGS.") + +(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]" + "If -mmfpgpr was used, a floating point register or NO_REGS.") + +(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]" + "Floating point register if the LFIWAX instruction is enabled or NO_REGS.") + +(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]" + "VSX register if direct move instructions are enabled, or NO_REGS.") + +;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use +;; direct move directly, and movsf can't to move between the register sets. +;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode +(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).") + +(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]" + "General purpose register if 64-bit instructions are enabled or NO_REGS.") -;; scalar double (DF) (define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]" - "@internal") + "VSX vector register to hold scalar double values or NO_REGS.") -;; any VSX register -(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]" - "@internal") +(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]" + "VSX vector register to hold 128 bit integer or NO_REGS.") + +(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]" + "Altivec register to use for float/32-bit int loads/stores or NO_REGS.") + +(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]" + "Altivec register to use for double loads/stores or NO_REGS.") + +(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]" + "FP or VSX register to perform float operations under -mvsx or NO_REGS.") + +(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]" + "Floating point register if the STFIWX instruction is enabled or NO_REGS.") + +(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]" + "VSX vector register to hold scalar float values or NO_REGS.") + +(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]" + "Floating point register if the LFIWZX instruction is enabled or NO_REGS.") + +;; Lq/stq validates the address for load/store quad +(define_memory_constraint "wQ" + "Memory operand suitable for the load/store quad instructions" + (match_operand 0 "quad_memory_operand")) ;; Altivec style load/store that ignores the bottom bits of the address (define_memory_constraint "wZ" diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md new file mode 100644 index 000000000000..9f7e4a1b255c --- /dev/null +++ b/gcc/config/rs6000/crypto.md @@ -0,0 +1,101 @@ +;; Cryptographic instructions added in ISA 2.07 +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_c_enum "unspec" + [UNSPEC_VCIPHER + UNSPEC_VNCIPHER + UNSPEC_VCIPHERLAST + UNSPEC_VNCIPHERLAST + UNSPEC_VSBOX + UNSPEC_VSHASIGMA + UNSPEC_VPERMXOR + UNSPEC_VPMSUM]) + +;; Iterator for VPMSUM/VPERMXOR +(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI]) + +(define_mode_attr CR_char [(V16QI "b") + (V8HI "h") + (V4SI "w") + (V2DI "d")]) + +;; Iterator for VSHASIGMAD/VSHASIGMAW +(define_mode_iterator CR_hash [V4SI V2DI]) + +;; Iterator for the other crypto functions +(define_int_iterator CR_code [UNSPEC_VCIPHER + UNSPEC_VNCIPHER + UNSPEC_VCIPHERLAST + UNSPEC_VNCIPHERLAST]) + +(define_int_attr CR_insn [(UNSPEC_VCIPHER "vcipher") + (UNSPEC_VNCIPHER "vncipher") + (UNSPEC_VCIPHERLAST "vcipherlast") + (UNSPEC_VNCIPHERLAST "vncipherlast")]) + +;; 2 operand crypto instructions +(define_insn "crypto_" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + CR_code))] + "TARGET_CRYPTO" + " %0,%1,%2" + [(set_attr "type" "crypto")]) + +(define_insn "crypto_vpmsum" + [(set (match_operand:CR_mode 0 "register_operand" "=v") + (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") + (match_operand:CR_mode 2 "register_operand" "v")] + UNSPEC_VPMSUM))] + "TARGET_CRYPTO" + "vpmsum %0,%1,%2" + [(set_attr "type" "crypto")]) + +;; 3 operand crypto instructions +(define_insn "crypto_vpermxor_" + [(set (match_operand:CR_mode 0 "register_operand" "=v") + (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") + (match_operand:CR_mode 2 "register_operand" "v") + (match_operand:CR_mode 3 "register_operand" "v")] + UNSPEC_VPERMXOR))] + "TARGET_CRYPTO" + "vpermxor %0,%1,%2,%3" + [(set_attr "type" "crypto")]) + +;; 1 operand crypto instruction +(define_insn "crypto_vsbox" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")] + UNSPEC_VSBOX))] + "TARGET_CRYPTO" + "vsbox %0,%1" + [(set_attr "type" "crypto")]) + +;; Hash crypto instructions +(define_insn "crypto_vshasigma" + [(set (match_operand:CR_hash 0 "register_operand" "=v") + (unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_1_operand" "n") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPEC_VSHASIGMA))] + "TARGET_CRYPTO" + "vshasigma %0,%1,%2,%3" + [(set_attr "type" "crypto")]) diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index 040bae490013..0c50c782aa9f 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -29,77 +29,6 @@ ]) -(define_expand "movsd" - [(set (match_operand:SD 0 "nonimmediate_operand" "") - (match_operand:SD 1 "any_operand" ""))] - "TARGET_HARD_FLOAT && TARGET_FPRS" - "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }") - -(define_split - [(set (match_operand:SD 0 "gpc_reg_operand" "") - (match_operand:SD 1 "const_double_operand" ""))] - "reload_completed - && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) - || (GET_CODE (operands[0]) == SUBREG - && GET_CODE (SUBREG_REG (operands[0])) == REG - && REGNO (SUBREG_REG (operands[0])) <= 31))" - [(set (match_dup 2) (match_dup 3))] - " -{ - long l; - REAL_VALUE_TYPE rv; - - REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]); - REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l); - - if (! TARGET_POWERPC64) - operands[2] = operand_subword (operands[0], 0, 0, SDmode); - else - operands[2] = gen_lowpart (SImode, operands[0]); - - operands[3] = gen_int_mode (l, SImode); -}") - -(define_insn "movsd_hardfloat" - [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r") - (match_operand:SD 1 "input_operand" "r,m,r,f,r,h,0,G,Fn"))] - "(gpc_reg_operand (operands[0], SDmode) - || gpc_reg_operand (operands[1], SDmode)) - && (TARGET_HARD_FLOAT && TARGET_FPRS)" - "@ - mr %0,%1 - lwz%U1%X1 %0,%1 - stw%U0%X0 %1,%0 - fmr %0,%1 - mt%0 %1 - mf%1 %0 - nop - # - #" - [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*") - (set_attr "length" "4,4,4,4,4,4,4,4,8")]) - -(define_insn "movsd_softfloat" - [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h") - (match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))] - "(gpc_reg_operand (operands[0], SDmode) - || gpc_reg_operand (operands[1], SDmode)) - && (TARGET_SOFT_FLOAT || !TARGET_FPRS)" - "@ - mr %0,%1 - mt%0 %1 - mf%1 %0 - lwz%U1%X1 %0,%1 - stw%U0%X0 %1,%0 - li %0,%1 - lis %0,%v1 - la %0,%a1 - # - # - nop" - [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*") - (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")]) - (define_insn "movsd_store" [(set (match_operand:DD 0 "nonimmediate_operand" "=m") (unspec:DD [(match_operand:SD 1 "input_operand" "d")] @@ -108,7 +37,14 @@ || gpc_reg_operand (operands[1], SDmode)) && TARGET_HARD_FLOAT && TARGET_FPRS" "stfd%U0%X0 %1,%0" - [(set_attr "type" "fpstore") + [(set (attr "type") + (if_then_else + (match_test "update_indexed_address_mem (operands[0], VOIDmode)") + (const_string "fpstore_ux") + (if_then_else + (match_test "update_address_mem (operands[0], VOIDmode)") + (const_string "fpstore_u") + (const_string "fpstore")))) (set_attr "length" "4")]) (define_insn "movsd_load" @@ -119,7 +55,14 @@ || gpc_reg_operand (operands[1], DDmode)) && TARGET_HARD_FLOAT && TARGET_FPRS" "lfd%U1%X1 %0,%1" - [(set_attr "type" "fpload") + [(set (attr "type") + (if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "fpload_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "fpload_u") + (const_string "fpload")))) (set_attr "length" "4")]) ;; Hardware support for decimal floating point operations. @@ -182,211 +125,6 @@ "fnabs %0,%1" [(set_attr "type" "fp")]) -(define_expand "movdd" - [(set (match_operand:DD 0 "nonimmediate_operand" "") - (match_operand:DD 1 "any_operand" ""))] - "" - "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }") - -(define_split - [(set (match_operand:DD 0 "gpc_reg_operand" "") - (match_operand:DD 1 "const_int_operand" ""))] - "! TARGET_POWERPC64 && reload_completed - && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) - || (GET_CODE (operands[0]) == SUBREG - && GET_CODE (SUBREG_REG (operands[0])) == REG - && REGNO (SUBREG_REG (operands[0])) <= 31))" - [(set (match_dup 2) (match_dup 4)) - (set (match_dup 3) (match_dup 1))] - " -{ - int endian = (WORDS_BIG_ENDIAN == 0); - HOST_WIDE_INT value = INTVAL (operands[1]); - - operands[2] = operand_subword (operands[0], endian, 0, DDmode); - operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode); -#if HOST_BITS_PER_WIDE_INT == 32 - operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx; -#else - operands[4] = GEN_INT (value >> 32); - operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000); -#endif -}") - -(define_split - [(set (match_operand:DD 0 "gpc_reg_operand" "") - (match_operand:DD 1 "const_double_operand" ""))] - "! TARGET_POWERPC64 && reload_completed - && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) - || (GET_CODE (operands[0]) == SUBREG - && GET_CODE (SUBREG_REG (operands[0])) == REG - && REGNO (SUBREG_REG (operands[0])) <= 31))" - [(set (match_dup 2) (match_dup 4)) - (set (match_dup 3) (match_dup 5))] - " -{ - int endian = (WORDS_BIG_ENDIAN == 0); - long l[2]; - REAL_VALUE_TYPE rv; - - REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]); - REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l); - - operands[2] = operand_subword (operands[0], endian, 0, DDmode); - operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode); - operands[4] = gen_int_mode (l[endian], SImode); - operands[5] = gen_int_mode (l[1 - endian], SImode); -}") - -(define_split - [(set (match_operand:DD 0 "gpc_reg_operand" "") - (match_operand:DD 1 "const_double_operand" ""))] - "TARGET_POWERPC64 && reload_completed - && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) - || (GET_CODE (operands[0]) == SUBREG - && GET_CODE (SUBREG_REG (operands[0])) == REG - && REGNO (SUBREG_REG (operands[0])) <= 31))" - [(set (match_dup 2) (match_dup 3))] - " -{ - int endian = (WORDS_BIG_ENDIAN == 0); - long l[2]; - REAL_VALUE_TYPE rv; -#if HOST_BITS_PER_WIDE_INT >= 64 - HOST_WIDE_INT val; -#endif - - REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]); - REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l); - - operands[2] = gen_lowpart (DImode, operands[0]); - /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */ -#if HOST_BITS_PER_WIDE_INT >= 64 - val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32 - | ((HOST_WIDE_INT)(unsigned long)l[1 - endian])); - - operands[3] = gen_int_mode (val, DImode); -#else - operands[3] = immed_double_const (l[1 - endian], l[endian], DImode); -#endif -}") - -;; Don't have reload use general registers to load a constant. First, -;; it might not work if the output operand is the equivalent of -;; a non-offsettable memref, but also it is less efficient than loading -;; the constant into an FP register, since it will probably be used there. -;; The "??" is a kludge until we can figure out a more reasonable way -;; of handling these non-offsettable values. -(define_insn "*movdd_hardfloat32" - [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r") - (match_operand:DD 1 "input_operand" "r,m,r,d,m,d,G,H,F"))] - "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS - && (gpc_reg_operand (operands[0], DDmode) - || gpc_reg_operand (operands[1], DDmode))" - "* -{ - switch (which_alternative) - { - default: - gcc_unreachable (); - case 0: - case 1: - case 2: - return \"#\"; - case 3: - return \"fmr %0,%1\"; - case 4: - return \"lfd%U1%X1 %0,%1\"; - case 5: - return \"stfd%U0%X0 %1,%0\"; - case 6: - case 7: - case 8: - return \"#\"; - } -}" - [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*") - (set_attr "length" "8,16,16,4,4,4,8,12,16")]) - -(define_insn "*movdd_softfloat32" - [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r") - (match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))] - "! TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS) - && (gpc_reg_operand (operands[0], DDmode) - || gpc_reg_operand (operands[1], DDmode))" - "#" - [(set_attr "type" "two,load,store,*,*,*") - (set_attr "length" "8,8,8,8,12,16")]) - -; ld/std require word-aligned displacements -> 'Y' constraint. -; List Y->r and r->Y before r->r for reload. -(define_insn "*movdd_hardfloat64_mfpgpr" - [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d") - (match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))] - "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS - && (gpc_reg_operand (operands[0], DDmode) - || gpc_reg_operand (operands[1], DDmode))" - "@ - std%U0%X0 %1,%0 - ld%U1%X1 %0,%1 - mr %0,%1 - fmr %0,%1 - lfd%U1%X1 %0,%1 - stfd%U0%X0 %1,%0 - mt%0 %1 - mf%1 %0 - nop - # - # - # - mftgpr %0,%1 - mffgpr %0,%1" - [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr") - (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) - -; ld/std require word-aligned displacements -> 'Y' constraint. -; List Y->r and r->Y before r->r for reload. -(define_insn "*movdd_hardfloat64" - [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r") - (match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))] - "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS - && (gpc_reg_operand (operands[0], DDmode) - || gpc_reg_operand (operands[1], DDmode))" - "@ - std%U0%X0 %1,%0 - ld%U1%X1 %0,%1 - mr %0,%1 - fmr %0,%1 - lfd%U1%X1 %0,%1 - stfd%U0%X0 %1,%0 - mt%0 %1 - mf%1 %0 - nop - # - # - #" - [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*") - (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")]) - -(define_insn "*movdd_softfloat64" - [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h") - (match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))] - "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS) - && (gpc_reg_operand (operands[0], DDmode) - || gpc_reg_operand (operands[1], DDmode))" - "@ - ld%U1%X1 %0,%1 - std%U0%X0 %1,%0 - mr %0,%1 - mt%0 %1 - mf%1 %0 - # - # - # - nop" - [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*") - (set_attr "length" "4,4,4,4,4,8,12,16,4")]) - (define_expand "negtd2" [(set (match_operand:TD 0 "gpc_reg_operand" "") (neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))] @@ -423,27 +161,6 @@ "fnabs %0,%1" [(set_attr "type" "fp")]) -(define_expand "movtd" - [(set (match_operand:TD 0 "general_operand" "") - (match_operand:TD 1 "any_operand" ""))] - "TARGET_HARD_FLOAT && TARGET_FPRS" - "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }") - -; It's important to list the Y->r and r->Y moves before r->r because -; otherwise reload, given m->r, will try to pick r->r and reload it, -; which doesn't make progress. -(define_insn_and_split "*movtd_internal" - [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r") - (match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))] - "TARGET_HARD_FLOAT && TARGET_FPRS - && (gpc_reg_operand (operands[0], TDmode) - || gpc_reg_operand (operands[1], TDmode))" - "#" - "&& reload_completed" - [(pc)] -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; } - [(set_attr "length" "8,8,8,20,20,16")]) - ;; Hardware support for decimal floating point operations. (define_insn "extendddtd2" diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c index 2f4d1ead8f3c..1bc8ace2a5fb 100644 --- a/gcc/config/rs6000/driver-rs6000.c +++ b/gcc/config/rs6000/driver-rs6000.c @@ -167,7 +167,7 @@ elf_platform (void) if (fd != -1) { - char buf[1024]; + static char buf[1024]; ElfW(auxv_t) *av; ssize_t n; diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md new file mode 100644 index 000000000000..83bf71974830 --- /dev/null +++ b/gcc/config/rs6000/power8.md @@ -0,0 +1,373 @@ +;; Scheduling description for IBM POWER8 processor. +;; Copyright (C) 2013 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "power8fxu,power8lsu,power8vsu,power8misc") + +(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu") +(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu") +(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu") +(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu") +(define_cpu_unit "bpu_power8,cru_power8" "power8misc") +(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\ + du5_power8,du6_power8" "power8misc") + + +; Dispatch group reservations +(define_reservation "DU_any_power8" + "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\ + du5_power8") + +; 2-way Cracked instructions go in slots 0-1 +; (can also have a second in slots 3-4 if insns are adjacent) +(define_reservation "DU_cracked_power8" + "du0_power8+du1_power8") + +; Insns that are first in group +(define_reservation "DU_first_power8" + "du0_power8") + +; Insns that are first and last in group +(define_reservation "DU_both_power8" + "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\ + du5_power8+du6_power8") + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\ + du5_power8,du6_power8") +(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\ + du6_power8") +(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8") +(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8") +(absence_set "du4_power8" "du5_power8,du6_power8") +(absence_set "du5_power8" "du6_power8") + + +; Execution unit reservations +(define_reservation "FXU_power8" + "fxu0_power8|fxu1_power8") + +(define_reservation "LU_power8" + "lu0_power8|lu1_power8") + +(define_reservation "LSU_power8" + "lsu0_power8|lsu1_power8") + +(define_reservation "LU_or_LSU_power8" + "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8") + +(define_reservation "VSU_power8" + "vsu0_power8|vsu1_power8") + + +; LS Unit +(define_insn_reservation "power8-load" 3 + (and (eq_attr "type" "load") + (eq_attr "cpu" "power8")) + "DU_any_power8,LU_or_LSU_power8") + +(define_insn_reservation "power8-load-update" 3 + (and (eq_attr "type" "load_u,load_ux") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_or_LSU_power8+FXU_power8") + +(define_insn_reservation "power8-load-ext" 3 + (and (eq_attr "type" "load_ext") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_or_LSU_power8,FXU_power8") + +(define_insn_reservation "power8-load-ext-update" 3 + (and (eq_attr "type" "load_ext_u,load_ext_ux") + (eq_attr "cpu" "power8")) + "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8") + +(define_insn_reservation "power8-fpload" 5 + (and (eq_attr "type" "fpload,vecload") + (eq_attr "cpu" "power8")) + "DU_any_power8,LU_power8") + +(define_insn_reservation "power8-fpload-update" 5 + (and (eq_attr "type" "fpload_u,fpload_ux") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_power8+FXU_power8") + +(define_insn_reservation "power8-store" 5 ; store-forwarding latency + (and (eq_attr "type" "store,store_u") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-store-update-indexed" 5 + (and (eq_attr "type" "store_ux") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-fpstore" 5 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-fpstore-update" 5 + (and (eq_attr "type" "fpstore_u,fpstore_ux") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-vecstore" 5 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power8")) + "DU_both_power8,LU_or_LSU_power8") + +(define_insn_reservation "power8-stcx" 10 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power8")) + "DU_both_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-sync" 1 + (and (eq_attr "type" "sync,isync") + (eq_attr "cpu" "power8")) + "DU_both_power8,LSU_power8") + + +; FX Unit +(define_insn_reservation "power8-1cyc" 1 + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,exts,isel") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 2 "power8-1cyc" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") +; "power8-load,power8-load-update,power8-load-ext,\ +; power8-load-ext-update,power8-fpload,power8-fpload-update,\ +; power8-store,power8-store-update,power8-store-update-indexed,\ +; power8-fpstore,power8-fpstore-update,power8-vecstore,\ +; power8-larx,power8-stcx") + +(define_insn_reservation "power8-2cyc" 2 + (and (eq_attr "type" "cntlz,popcnt") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +(define_insn_reservation "power8-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power8")) + "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8") + +(define_insn_reservation "power8-three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power8")) + "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8") + +; cmp - Normal compare insns +(define_insn_reservation "power8-cmp" 2 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; fast_compare : add./and./nor./etc +(define_insn_reservation "power8-fast-compare" 2 + (and (eq_attr "type" "fast_compare") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; compare : rldicl./exts./etc +; delayed_compare : rlwinm./slwi./etc +; var_delayed_compare : rlwnm./slw./etc +(define_insn_reservation "power8-compare" 2 + (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,FXU_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 3 "power8-fast-compare,power8-compare" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") + +; 5 cycle CR latency +(define_bypass 5 "power8-fast-compare,power8-compare" + "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch") + +(define_insn_reservation "power8-mul" 4 + (and (eq_attr "type" "imul,imul2,imul3,lmul") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +(define_insn_reservation "power8-mul-compare" 4 + (and (eq_attr "type" "imul_compare,lmul_compare") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 5 "power8-mul,power8-mul-compare" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") + +; 7 cycle CR latency +(define_bypass 7 "power8-mul,power8-mul-compare" + "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch") + +; FXU divides are not pipelined +(define_insn_reservation "power8-idiv" 37 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,fxu0_power8*37|fxu1_power8*37") + +(define_insn_reservation "power8-ldiv" 68 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,fxu0_power8*68|fxu1_power8*68") + +(define_insn_reservation "power8-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power8")) + "DU_first_power8,FXU_power8") + +; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode +(define_insn_reservation "power8-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power8")) + "DU_both_power8,FXU_power8") + + +; CR Unit +(define_insn_reservation "power8-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8+FXU_power8") + +(define_insn_reservation "power8-crlogical" 3 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8") + +(define_insn_reservation "power8-mfcr" 5 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power8")) + "DU_both_power8,cru_power8") + +(define_insn_reservation "power8-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8") + + +; BR Unit +; Branches take dispatch slot 7, but reserve any remaining prior slots to +; prevent other insns from grabbing them once this is assigned. +(define_insn_reservation "power8-branch" 3 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power8")) + "(du6_power8\ + |du5_power8+du6_power8\ + |du4_power8+du5_power8+du6_power8\ + |du3_power8+du4_power8+du5_power8+du6_power8\ + |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\ + |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\ + |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\ + du6_power8),bpu_power8") + +; Branch updating LR/CTR feeding mf[lr|ctr] +(define_bypass 4 "power8-branch" "power8-mfjmpr") + + +; VS Unit (includes FP/VSX/VMX/DFP/Crypto) +(define_insn_reservation "power8-fp" 6 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +; Additional 3 cycles for any CR result +(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch") + +(define_insn_reservation "power8-fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-sdiv" 27 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-sqrt" 32 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-dsqrt" 44 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecsimple" 2 + (and (eq_attr "type" "vecperm,vecsimple,veccmp") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecnormal" 6 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_bypass 7 "power8-vecnormal" + "power8-vecsimple,power8-veccomplex,power8-fpstore*,\ + power8-vecstore") + +(define_insn_reservation "power8-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecfdiv" 25 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecdiv" 31 + (and (eq_attr "type" "vecdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-mffgpr" 5 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-mftgpr" 6 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-crypto" 7 + (and (eq_attr "type" "crypto") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 07944db10895..c69f5d6b2609 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -166,6 +166,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) +;; Match op = 0..15 +(define_predicate "const_0_to_15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) + ;; Return 1 if op is a register that is not special. (define_predicate "gpc_reg_operand" (match_operand 0 "register_operand") @@ -182,9 +187,68 @@ if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op))) return 1; + if (TARGET_VSX && VSX_REGNO_P (REGNO (op))) + return 1; + return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op)); }) +;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't +;; allow floating point or vector registers. +(define_predicate "int_reg_operand" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return INT_REGNO_P (REGNO (op)); +}) + +;; Like int_reg_operand, but only return true for base registers +(define_predicate "base_reg_operand" + (match_operand 0 "int_reg_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return (REGNO (op) != FIRST_GPR_REGNO); +}) + +;; Return 1 if op is a general purpose register that is an even register +;; which suitable for a load/store quad operation +(define_predicate "quad_int_reg_operand" + (match_operand 0 "register_operand") +{ + HOST_WIDE_INT r; + + if (!TARGET_QUAD_MEMORY) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + r = REGNO (op); + if (r >= FIRST_PSEUDO_REGISTER) + return 1; + + return (INT_REGNO_P (r) && ((r & 1) == 0)); +}) + ;; Return 1 if op is a register that is a condition register field. (define_predicate "cc_reg_operand" (match_operand 0 "register_operand") @@ -315,6 +379,11 @@ && CONST_DOUBLE_HIGH (op) == 0") (match_operand 0 "gpc_reg_operand")))) +;; Like reg_or_logical_cint_operand, but allow vsx registers +(define_predicate "vsx_reg_or_cint_operand" + (ior (match_operand 0 "vsx_register_operand") + (match_operand 0 "reg_or_logical_cint_operand"))) + ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register ;; with no more than one instruction per word. (define_predicate "easy_fp_constant" @@ -333,6 +402,11 @@ && mode != DImode) return 1; + /* The constant 0.0 is easy under VSX. */ + if ((mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode) + && VECTOR_UNIT_VSX_P (DFmode) && op == CONST0_RTX (mode)) + return 1; + if (DECIMAL_FLOAT_MODE_P (mode)) return 0; @@ -521,6 +595,54 @@ (and (match_operand 0 "memory_operand") (match_test "offsettable_nonstrict_memref_p (op)"))) +;; Return 1 if the operand is suitable for load/store quad memory. +(define_predicate "quad_memory_operand" + (match_code "mem") +{ + rtx addr, op0, op1; + int ret; + + if (!TARGET_QUAD_MEMORY) + ret = 0; + + else if (!memory_operand (op, mode)) + ret = 0; + + else if (GET_MODE_SIZE (GET_MODE (op)) != 16) + ret = 0; + + else if (MEM_ALIGN (op) < 128) + ret = 0; + + else + { + addr = XEXP (op, 0); + if (int_reg_operand (addr, Pmode)) + ret = 1; + + else if (GET_CODE (addr) != PLUS) + ret = 0; + + else + { + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + ret = (int_reg_operand (op0, Pmode) + && GET_CODE (op1) == CONST_INT + && IN_RANGE (INTVAL (op1), -32768, 32767) + && (INTVAL (op1) & 15) == 0); + } + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false"); + debug_rtx (op); + } + + return ret; +}) + ;; Return 1 if the operand is an indexed or indirect memory operand. (define_predicate "indexed_or_indirect_operand" (match_code "mem") @@ -535,6 +657,19 @@ return indexed_or_indirect_address (op, mode); }) +;; Like indexed_or_indirect_operand, but also allow a GPR register if direct +;; moves are supported. +(define_predicate "reg_or_indexed_operand" + (match_code "mem,reg") +{ + if (MEM_P (op)) + return indexed_or_indirect_operand (op, mode); + else if (TARGET_DIRECT_MOVE) + return register_operand (op, mode); + return + 0; +}) + ;; Return 1 if the operand is an indexed or indirect memory operand with an ;; AND -16 in it, used to recognize when we need to switch to Altivec loads ;; to realign loops instead of VSX (altivec silently ignores the bottom bits, @@ -560,6 +695,28 @@ && REG_P (XEXP (op, 1)))") (match_operand 0 "address_operand"))) +;; Return 1 if the operand is an index-form address. +(define_special_predicate "indexed_address" + (match_test "(GET_CODE (op) == PLUS + && REG_P (XEXP (op, 0)) + && REG_P (XEXP (op, 1)))")) + +;; Return 1 if the operand is a MEM with an update-form address. This may +;; also include update-indexed form. +(define_special_predicate "update_address_mem" + (match_test "(MEM_P (op) + && (GET_CODE (XEXP (op, 0)) == PRE_INC + || GET_CODE (XEXP (op, 0)) == PRE_DEC + || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))")) + +;; Return 1 if the operand is a MEM with an update-indexed-form address. Note +;; that PRE_INC/PRE_DEC will always be non-indexed (i.e. non X-form) since the +;; increment is based on the mode size and will therefor always be a const. +(define_special_predicate "update_indexed_address_mem" + (match_test "(MEM_P (op) + && GET_CODE (XEXP (op, 0)) == PRE_MODIFY + && indexed_address (XEXP (XEXP (op, 0), 1), mode))")) + ;; Used for the destination of the fix_truncdfsi2 expander. ;; If stfiwx will be used, the result goes to memory; otherwise, ;; we're going to emit a store and a load of a subreg, so the dest is a @@ -1534,3 +1691,99 @@ return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL; }) + +;; Match the first insn (addis) in fusing the combination of addis and loads to +;; GPR registers on power8. +(define_predicate "fusion_gpr_addis" + (match_code "const_int,high,plus") +{ + HOST_WIDE_INT value; + rtx int_const; + + if (GET_CODE (op) == HIGH) + return 1; + + if (CONST_INT_P (op)) + int_const = op; + + else if (GET_CODE (op) == PLUS + && base_reg_operand (XEXP (op, 0), Pmode) + && CONST_INT_P (XEXP (op, 1))) + int_const = XEXP (op, 1); + + else + return 0; + + /* Power8 currently will only do the fusion if the top 11 bits of the addis + value are all 1's or 0's. */ + value = INTVAL (int_const); + if ((value & (HOST_WIDE_INT)0xffff) != 0) + return 0; + + if ((value & (HOST_WIDE_INT)0xffff0000) == 0) + return 0; + + return (IN_RANGE (value >> 16, -32, 31)); +}) + +;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis +;; and loads to GPR registers on power8. +(define_predicate "fusion_gpr_mem_load" + (match_code "mem,sign_extend,zero_extend") +{ + rtx addr; + + /* Handle sign/zero extend. */ + if (GET_CODE (op) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND)) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!MEM_P (op)) + return 0; + + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + case DImode: + if (!TARGET_POWERPC64) + return 0; + break; + + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) == PLUS) + { + rtx base = XEXP (addr, 0); + rtx offset = XEXP (addr, 1); + + return (base_reg_operand (base, GET_MODE (base)) + && satisfies_constraint_I (offset)); + } + + else if (GET_CODE (addr) == LO_SUM) + { + rtx base = XEXP (addr, 0); + rtx offset = XEXP (addr, 1); + + if (!base_reg_operand (base, GET_MODE (base))) + return 0; + + else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return 0; +}) diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index a545fe3e448e..971495ff47b3 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -30,7 +30,7 @@ RS6000_BUILTIN_A -- ABS builtins RS6000_BUILTIN_D -- DST builtins RS6000_BUILTIN_E -- SPE EVSEL builtins. - RS6000_BUILTIN_P -- Altivec and VSX predicate builtins + RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins RS6000_BUILTIN_S -- SPE predicate builtins RS6000_BUILTIN_X -- special builtins @@ -301,6 +301,108 @@ | RS6000_BTC_SPECIAL), \ CODE_FOR_nothing) /* ICODE */ +/* ISA 2.07 (power8) vector convenience macros. */ +/* For the instructions that are encoded as altivec instructions use + __builtin_altivec_ as the builtin name. */ +#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* For the instructions encoded as VSX instructions use __builtin_vsx as the + builtin name. */ +#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P8V_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* Crypto convenience macros. */ +#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + /* SPE convenience macros. */ #define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ @@ -1012,7 +1114,7 @@ BU_VSX_1 (XVTSQRTSP_FG, "xvtsqrtsp_fg", CONST, vsx_tsqrtv4sf2_fg) BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2) BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp) -BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvdpsp) +BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp) BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp) BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp) BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe) @@ -1052,9 +1154,9 @@ BU_VSX_1 (XVRSPIZ, "xvrspiz", CONST, vsx_btruncv4sf2) BU_VSX_1 (XSRDPI, "xsrdpi", CONST, vsx_xsrdpi) BU_VSX_1 (XSRDPIC, "xsrdpic", CONST, vsx_xsrdpic) -BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, vsx_floordf2) -BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, vsx_ceildf2) -BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, vsx_btruncdf2) +BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, floordf2) +BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, ceildf2) +BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, btruncdf2) /* VSX predicate functions. */ BU_VSX_P (XVCMPEQSP_P, "xvcmpeqsp_p", CONST, vector_eq_v4sf_p) @@ -1131,6 +1233,139 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw") /* VSX builtins that are handled as special cases. */ BU_VSX_OVERLOAD_X (LD, "ld") BU_VSX_OVERLOAD_X (ST, "st") + +/* 1 argument VSX instructions added in ISA 2.07. */ +BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) +BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) + +/* 1 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) +BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw) +BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw) +BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2) +BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2) +BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2) +BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2) +BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2) +BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2) +BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2) +BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2) +BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd) + +/* 2 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3) +BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3) +BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3) +BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3) +BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3) +BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew) +BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow) +BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) +BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) +BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) +BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpkswus) +BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) +BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) +BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) +BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3) +BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3) + +BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3) +BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3) +BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3) +BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3) +BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3) +BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3) + +BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3) +BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3) +BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3) +BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3) +BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3) +BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3) + +BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3) +BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3) +BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3) +BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3) +BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3) +BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3) + +/* Vector comparison instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di) +BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di) +BU_P8V_AV_2 (VCMPGTUD, "vcmpgtud", CONST, vector_gtuv2di) + +/* Vector comparison predicate instructions added in ISA 2.07. */ +BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p) +BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p) +BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p) + +/* ISA 2.07 vector overloaded 1 argument functions. */ +BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw") +BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw") +BU_P8V_OVERLOAD_1 (VCLZ, "vclz") +BU_P8V_OVERLOAD_1 (VCLZB, "vclzb") +BU_P8V_OVERLOAD_1 (VCLZH, "vclzh") +BU_P8V_OVERLOAD_1 (VCLZW, "vclzw") +BU_P8V_OVERLOAD_1 (VCLZD, "vclzd") +BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt") +BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb") +BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth") +BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw") +BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd") +BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") + +/* ISA 2.07 vector overloaded 2 argument functions. */ +BU_P8V_OVERLOAD_2 (EQV, "eqv") +BU_P8V_OVERLOAD_2 (NAND, "nand") +BU_P8V_OVERLOAD_2 (ORC, "orc") +BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm") +BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd") +BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud") +BU_P8V_OVERLOAD_2 (VMINSD, "vminsd") +BU_P8V_OVERLOAD_2 (VMINUD, "vminud") +BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew") +BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow") +BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") +BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") +BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") +BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus") +BU_P8V_OVERLOAD_2 (VRLD, "vrld") +BU_P8V_OVERLOAD_2 (VSLD, "vsld") +BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") +BU_P8V_OVERLOAD_2 (VSRD, "vsrd") +BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm") + + +/* 1 argument crypto functions. */ +BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) + +/* 2 argument crypto functions. */ +BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher) +BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast) +BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher) +BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast) +BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) + +/* 3 argument crypto functions. */ +BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di) +BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si) +BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi) +BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi) +BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw) +BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad) + +/* 2 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum") + +/* 3 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor") +BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma") + /* 3 argument paired floating point builtins. */ BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index a4f66ba8f1bd..562fca3724a6 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X"); if ((flags & OPTION_MASK_POPCNTD) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); + if ((flags & OPTION_MASK_DIRECT_MOVE) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8"); if ((flags & OPTION_MASK_SOFT_FLOAT) != 0) rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT"); if ((flags & OPTION_MASK_RECIP_PRECISION) != 0) @@ -331,6 +333,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, } if ((flags & OPTION_MASK_VSX) != 0) rs6000_define_or_undefine_macro (define_p, "__VSX__"); + if ((flags & OPTION_MASK_P8_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); + if ((flags & OPTION_MASK_CRYPTO) != 0) + rs6000_define_or_undefine_macro (define_p, "__CRYPTO__"); /* options from the builtin masks. */ if ((bu_mask & RS6000_BTM_SPE) != 0) @@ -505,6 +511,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP, @@ -577,12 +585,24 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX, RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX, @@ -601,6 +621,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, @@ -651,6 +675,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP, @@ -937,6 +973,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP, @@ -975,6 +1015,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP, @@ -1021,6 +1065,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP, @@ -1418,6 +1466,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP, @@ -1604,6 +1664,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP, @@ -1786,6 +1858,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, @@ -1812,6 +1890,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS, RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS, @@ -1824,6 +1906,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS, @@ -1844,6 +1928,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, @@ -1868,6 +1956,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP, @@ -2032,6 +2124,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, @@ -2056,6 +2152,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, @@ -2196,6 +2296,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP, @@ -3327,6 +3439,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P, @@ -3372,11 +3498,455 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + /* Power8 vector overloaded functions. */ + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, 0, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, 0, 0, 0 }, + + /* Crypto builtins. */ + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 } }; @@ -3824,7 +4394,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, && (desc->op2 == RS6000_BTI_NOT_OPAQUE || rs6000_builtin_type_compatible (types[1], desc->op2)) && (desc->op3 == RS6000_BTI_NOT_OPAQUE - || rs6000_builtin_type_compatible (types[2], desc->op3))) + || rs6000_builtin_type_compatible (types[2], desc->op3)) + && rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE) return altivec_build_resolved_builtin (args, n, desc); bad: diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 3f17c6f2395f..93714d2f1dfc 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -28,7 +28,7 @@ ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel, fre, fsqrt, etc. were no longer documented as optional. Group masks by server and embedded. */ -#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS \ +#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \ | OPTION_MASK_CMPB \ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_PPC_GFXOPT \ @@ -38,12 +38,22 @@ /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but altivec is a win so enable it. */ + /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until + PR 58587 is fixed. */ #define ISA_2_6_MASKS_EMBEDDED (ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD) #define ISA_2_6_MASKS_SERVER (ISA_2_5_MASKS_SERVER \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_ALTIVEC \ | OPTION_MASK_VSX) +/* For now, don't provide an embedded version of ISA 2.07. */ +#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_CRYPTO \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_QUAD_MEMORY) + #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) /* Deal with ports that do not have -mstrict-align. */ @@ -60,7 +70,9 @@ /* Mask of all options to set the default isa flags based on -mcpu=. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ | OPTION_MASK_CMPB \ + | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \ + | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_DLMZB \ | OPTION_MASK_FPRND \ | OPTION_MASK_ISEL \ @@ -68,15 +80,19 @@ | OPTION_MASK_MFPGPR \ | OPTION_MASK_MULHW \ | OPTION_MASK_NO_UPDATE \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_POPCNTB \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_POWERPC64 \ | OPTION_MASK_PPC_GFXOPT \ | OPTION_MASK_PPC_GPOPT \ + | OPTION_MASK_QUAD_MEMORY \ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ - | OPTION_MASK_VSX) + | OPTION_MASK_VSX \ + | OPTION_MASK_VSX_TIMODE) #endif @@ -166,10 +182,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD | MASK_VSX | MASK_RECIP_PRECISION) -RS6000_CPU ("power8", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ - POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF - | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX | MASK_RECIP_PRECISION) +RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64) RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64) diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index bc18f8a168c2..5124e1665d4f 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -41,3 +41,8 @@ VECTOR_MODE (INT, DI, 1); VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ + +/* Replacement for TImode that only is allowed in GPRs. We also use PTImode + for quad memory atomic operations to force getting an even/odd register + combination. */ +PARTIAL_INT_MODE (TI); diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index fc843fd19cab..e143a4ca2037 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -59,7 +59,8 @@ enum processor_type PROCESSOR_POWER7, PROCESSOR_CELL, PROCESSOR_PPCA2, - PROCESSOR_TITAN + PROCESSOR_TITAN, + PROCESSOR_POWER8 }; /* FP processor type. */ @@ -131,11 +132,14 @@ enum rs6000_cmodel { CMODEL_LARGE }; -/* Describe which vector unit to use for a given machine mode. */ +/* Describe which vector unit to use for a given machine mode. The + VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and + P8_VECTOR are contiguous. */ enum rs6000_vector { VECTOR_NONE, /* Type is not a vector or not supported */ VECTOR_ALTIVEC, /* Use altivec for vector processing */ VECTOR_VSX, /* Use VSX for vector processing */ + VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */ VECTOR_PAIRED, /* Use paired floating point for vectors */ VECTOR_SPE, /* Use SPE for vector processing */ VECTOR_OTHER /* Some other vector unit */ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d9bcf1a41ede..5c315470f34c 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx); extern rtx find_addr_reg (rtx); extern rtx gen_easy_altivec_constant (rtx); extern const char *output_vec_const_move (rtx *); +extern const char *rs6000_output_move_128bit (rtx *); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); @@ -70,6 +71,11 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx); extern int registers_ok_for_quad_peep (rtx, rtx); extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); +extern bool direct_move_p (rtx, rtx); +extern bool quad_load_store_p (rtx, rtx); +extern bool fusion_gpr_load_p (rtx *, bool); +extern void expand_fusion_gpr_load (rtx *); +extern const char *emit_fusion_gpr_load (rtx *); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, @@ -135,6 +141,7 @@ extern rtx rs6000_address_for_fpconvert (rtx); extern rtx rs6000_address_for_altivec (rtx); extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool); extern int rs6000_loop_align (rtx); +extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool, rtx); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 0325870af97c..13333148411b 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -189,9 +189,6 @@ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; /* Map register number to register class. */ enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; -/* Reload functions based on the type and the vector unit. */ -static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2]; - static int dbg_cost_ctrl; /* Built in types. */ @@ -289,6 +286,105 @@ static struct don't link in rs6000-c.c, so we can't call it directly. */ void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); +/* Simplfy register classes into simpler classifications. We assume + GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range + check for standard register classes (gpr/floating/altivec/vsx) and + floating/vector classes (float/altivec/vsx). */ + +enum rs6000_reg_type { + NO_REG_TYPE, + PSEUDO_REG_TYPE, + GPR_REG_TYPE, + VSX_REG_TYPE, + ALTIVEC_REG_TYPE, + FPR_REG_TYPE, + SPR_REG_TYPE, + CR_REG_TYPE, + SPE_ACC_TYPE, + SPEFSCR_REG_TYPE +}; + +/* Map register class to register type. */ +static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; + +/* First/last register type for the 'normal' register types (i.e. general + purpose, floating point, altivec, and VSX registers). */ +#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) + +#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) + + +/* Register classes we care about in secondary reload or go if legitimate + address. We only need to worry about GPR, FPR, and Altivec registers here, + along an ANY field that is the OR of the 3 register classes. */ + +enum rs6000_reload_reg_type { + RELOAD_REG_GPR, /* General purpose registers. */ + RELOAD_REG_FPR, /* Traditional floating point regs. */ + RELOAD_REG_VMX, /* Altivec (VMX) registers. */ + RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ + N_RELOAD_REG +}; + +/* For setting up register classes, loop through the 3 register classes mapping + into real registers, and skip the ANY class, which is just an OR of the + bits. */ +#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR +#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX + +/* Map reload register type to a register in the register class. */ +struct reload_reg_map_type { + const char *name; /* Register class name. */ + int reg; /* Register in the register class. */ +}; + +static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { + { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ + { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ + { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ + { "Any", -1 }, /* RELOAD_REG_ANY. */ +}; + +/* Mask bits for each register class, indexed per mode. Historically the + compiler has been more restrictive which types can do PRE_MODIFY instead of + PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ +typedef unsigned char addr_mask_type; + +#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ +#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ +#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ +#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ +#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ +#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ + +/* Register type masks based on the type, of valid addressing modes. */ +struct rs6000_reg_addr { + enum insn_code reload_load; /* INSN to reload for loading. */ + enum insn_code reload_store; /* INSN to reload for storing. */ + enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ + enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ + enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ + addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ +}; + +static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; + +/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ +static inline bool +mode_supports_pre_incdec_p (enum machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) + != 0); +} + +/* Helper function to say whether a mode supports PRE_MODIFY. */ +static inline bool +mode_supports_pre_modify_p (enum machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) + != 0); +} + /* Target cpu costs. */ @@ -828,6 +924,25 @@ struct processor_costs power7_cost = { 12, /* prefetch streams */ }; +/* Instruction costs on POWER8 processors. */ +static const +struct processor_costs power8_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (14), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ +}; + /* Instruction costs on POWER A2 processors. */ static const struct processor_costs ppca2_cost = { @@ -1020,6 +1135,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *, static void rs6000_print_builtin_options (FILE *, int, const char *, HOST_WIDE_INT); +static enum rs6000_reg_type register_to_reg_type (rtx, bool *); +static bool rs6000_secondary_reload_move (enum rs6000_reg_type, + enum rs6000_reg_type, + enum machine_mode, + secondary_reload_info *, + bool); + /* Hash table stuff for keeping track of TOC entries. */ struct GTY(()) toc_hash_struct @@ -1513,8 +1635,9 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode) { unsigned HOST_WIDE_INT reg_size; + /* TF/TD modes are special in that they always take 2 registers. */ if (FP_REGNO_P (regno)) - reg_size = (VECTOR_MEM_VSX_P (mode) + reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode) ? UNITS_PER_VSX_WORD : UNITS_PER_FP_WORD); @@ -1546,16 +1669,38 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) { int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; + /* PTImode can only go in GPRs. Quad word memory operations require even/odd + register combinations, and use PTImode where we need to deal with quad + word memory operations. Don't allow quad words in the argument or frame + pointer registers, just registers 0..31. */ + if (mode == PTImode) + return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && ((regno & 1) == 0)); + /* VSX registers that overlap the FPR registers are larger than for non-VSX implementations. Don't allow an item to be split between a FP register - and an Altivec register. */ - if (VECTOR_MEM_VSX_P (mode)) + and an Altivec register. Allow TImode in all VSX registers if the user + asked for it. */ + if (TARGET_VSX && VSX_REGNO_P (regno) + && (VECTOR_MEM_VSX_P (mode) + || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode) + || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode)) + || (TARGET_VSX_TIMODE && mode == TImode))) { if (FP_REGNO_P (regno)) return FP_REGNO_P (last_regno); if (ALTIVEC_REGNO_P (regno)) - return ALTIVEC_REGNO_P (last_regno); + { + if (mode == SFmode && !TARGET_UPPER_REGS_SF) + return 0; + + if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF) + return 0; + + return ALTIVEC_REGNO_P (last_regno); + } } /* The GPRs can hold any mode, but values bigger than one register @@ -1564,8 +1709,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) return INT_REGNO_P (last_regno); /* The float registers (except for VSX vector modes) can only hold floating - modes and DImode. This excludes the 32-bit decimal float mode for - now. */ + modes and DImode. */ if (FP_REGNO_P (regno)) { if (SCALAR_FLOAT_MODE_P (mode) @@ -1599,9 +1743,8 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) return 1; - /* We cannot put TImode anywhere except general register and it must be able - to fit within the register set. In the future, allow TImode in the - Altivec or VSX registers. */ + /* We cannot put non-VSX TImode or PTImode anywhere except general register + and it must be able to fit within the register set. */ return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; } @@ -1668,6 +1811,16 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) comma = ", "; } + if (len > 70) + { + fprintf (stderr, ",\n\t"); + comma = ""; + } + + len += fprintf (stderr, "%sreg-class = %s", comma, + reg_class_names[(int)rs6000_regno_regclass[r]]); + comma = ", "; + if (len > 70) { fprintf (stderr, ",\n\t"); @@ -1678,6 +1831,63 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) } } +static const char * +rs6000_debug_vector_unit (enum rs6000_vector v) +{ + const char *ret; + + switch (v) + { + case VECTOR_NONE: ret = "none"; break; + case VECTOR_ALTIVEC: ret = "altivec"; break; + case VECTOR_VSX: ret = "vsx"; break; + case VECTOR_P8_VECTOR: ret = "p8_vector"; break; + case VECTOR_PAIRED: ret = "paired"; break; + case VECTOR_SPE: ret = "spe"; break; + case VECTOR_OTHER: ret = "other"; break; + default: ret = "unknown"; break; + } + + return ret; +} + +/* Print the address masks in a human readble fashion. */ +DEBUG_FUNCTION void +rs6000_debug_print_mode (ssize_t m) +{ + ssize_t rc; + + fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); + for (rc = 0; rc < N_RELOAD_REG; rc++) + { + addr_mask_type mask = reg_addr[m].addr_mask[rc]; + fprintf (stderr, + " %s: %c%c%c%c%c%c", + reload_reg_map[rc].name, + (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ', + (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ', + (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ', + (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ', + (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ', + (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' '); + } + + if (rs6000_vector_unit[m] != VECTOR_NONE + || rs6000_vector_mem[m] != VECTOR_NONE + || (reg_addr[m].reload_store != CODE_FOR_nothing) + || (reg_addr[m].reload_load != CODE_FOR_nothing)) + { + fprintf (stderr, + " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c", + rs6000_debug_vector_unit (rs6000_vector_unit[m]), + rs6000_debug_vector_unit (rs6000_vector_mem[m]), + (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', + (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); + } + + fputs ("\n", stderr); +} + #define DEBUG_FMT_ID "%-32s= " #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " @@ -1690,6 +1900,7 @@ rs6000_debug_reg_global (void) static const char *const tf[2] = { "false", "true" }; const char *nl = (const char *)0; int m; + size_t m1, m2, v; char costly_num[20]; char nop_num[20]; char flags_buffer[40]; @@ -1700,20 +1911,67 @@ rs6000_debug_reg_global (void) const char *cmodel_str; struct cl_target_option cl_opts; - /* Map enum rs6000_vector to string. */ - static const char *rs6000_debug_vector_unit[] = { - "none", - "altivec", - "vsx", - "paired", - "spe", - "other" + /* Modes we want tieable information on. */ + static const enum machine_mode print_tieable_modes[] = { + QImode, + HImode, + SImode, + DImode, + TImode, + PTImode, + SFmode, + DFmode, + TFmode, + SDmode, + DDmode, + TDmode, + V8QImode, + V4HImode, + V2SImode, + V16QImode, + V8HImode, + V4SImode, + V2DImode, + V32QImode, + V16HImode, + V8SImode, + V4DImode, + V2SFmode, + V4SFmode, + V2DFmode, + V8SFmode, + V4DFmode, + CCmode, + CCUNSmode, + CCEQmode, + }; + + /* Virtual regs we are interested in. */ + const static struct { + int regno; /* register number. */ + const char *name; /* register name. */ + } virtual_regs[] = { + { STACK_POINTER_REGNUM, "stack pointer:" }, + { TOC_REGNUM, "toc: " }, + { STATIC_CHAIN_REGNUM, "static chain: " }, + { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " }, + { HARD_FRAME_POINTER_REGNUM, "hard frame: " }, + { ARG_POINTER_REGNUM, "arg pointer: " }, + { FRAME_POINTER_REGNUM, "frame pointer:" }, + { FIRST_PSEUDO_REGISTER, "first pseudo: " }, + { FIRST_VIRTUAL_REGISTER, "first virtual:" }, + { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" }, + { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " }, + { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" }, + { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" }, + { VIRTUAL_CFA_REGNUM, "cfa (frame): " }, + { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" }, + { LAST_VIRTUAL_REGISTER, "last virtual: " }, }; - fprintf (stderr, "Register information: (last virtual reg = %d)\n", - LAST_VIRTUAL_REGISTER); - rs6000_debug_reg_print (0, 31, "gr"); - rs6000_debug_reg_print (32, 63, "fp"); + fputs ("\nHard register information:\n", stderr); + rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr"); + rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp"); rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO, "vs"); @@ -1726,6 +1984,10 @@ rs6000_debug_reg_global (void) rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a"); rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f"); + fputs ("\nVirtual/stack/frame registers:\n", stderr); + for (v = 0; v < ARRAY_SIZE (virtual_regs); v++) + fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno); + fprintf (stderr, "\n" "d reg_class = %s\n" @@ -1734,24 +1996,69 @@ rs6000_debug_reg_global (void) "wa reg_class = %s\n" "wd reg_class = %s\n" "wf reg_class = %s\n" - "ws reg_class = %s\n\n", + "wg reg_class = %s\n" + "wl reg_class = %s\n" + "wm reg_class = %s\n" + "wr reg_class = %s\n" + "ws reg_class = %s\n" + "wt reg_class = %s\n" + "wu reg_class = %s\n" + "wv reg_class = %s\n" + "ww reg_class = %s\n" + "wx reg_class = %s\n" + "wy reg_class = %s\n" + "wz reg_class = %s\n" + "\n", reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], - reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]); - + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]); + + nl = "\n"; for (m = 0; m < NUM_MACHINE_MODES; ++m) - if (rs6000_vector_unit[m] || rs6000_vector_mem[m]) - { - nl = "\n"; - fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n", - GET_MODE_NAME (m), - rs6000_debug_vector_unit[ rs6000_vector_unit[m] ], - rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]); - } + rs6000_debug_print_mode (m); + + fputs ("\n", stderr); + + for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) + { + enum machine_mode mode1 = print_tieable_modes[m1]; + bool first_time = true; + + nl = (const char *)0; + for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++) + { + enum machine_mode mode2 = print_tieable_modes[m2]; + if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2)) + { + if (first_time) + { + fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1)); + nl = "\n"; + first_time = false; + } + + fprintf (stderr, " %s", GET_MODE_NAME (mode2)); + } + } + + if (!first_time) + fputs ("\n", stderr); + } if (nl) fputs (nl, stderr); @@ -1935,6 +2242,13 @@ rs6000_debug_reg_global (void) if (TARGET_LINK_STACK) fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); + if (targetm.lra_p ()) + fprintf (stderr, DEBUG_FMT_S, "lra", "true"); + + if (TARGET_P8_FUSION) + fprintf (stderr, DEBUG_FMT_S, "p8 fusion", + (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero"); + fprintf (stderr, DEBUG_FMT_S, "plt-format", TARGET_SECURE_PLT ? "secure" : "bss"); fprintf (stderr, DEBUG_FMT_S, "struct-return", @@ -1954,11 +2268,106 @@ rs6000_debug_reg_global (void) (int)RS6000_BUILTIN_COUNT); } + +/* Update the addr mask bits in reg_addr to help secondary reload and go if + legitimate address support to figure out the appropriate addressing to + use. */ + +static void +rs6000_setup_reg_addr_masks (void) +{ + ssize_t rc, reg, m, nregs; + addr_mask_type any_addr_mask, addr_mask; + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + /* SDmode is special in that we want to access it only via REG+REG + addressing on power7 and above, since we want to use the LFIWZX and + STFIWZX instructions to load it. */ + bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + + any_addr_mask = 0; + for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) + { + addr_mask = 0; + reg = reload_reg_map[rc].reg; + + /* Can mode values go in the GPR/FPR/Altivec registers? */ + if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) + { + nregs = rs6000_hard_regno_nregs[m][reg]; + addr_mask |= RELOAD_REG_VALID; + + /* Indicate if the mode takes more than 1 physical register. If + it takes a single register, indicate it can do REG+REG + addressing. */ + if (nregs > 1 || m == BLKmode) + addr_mask |= RELOAD_REG_MULTIPLE; + else + addr_mask |= RELOAD_REG_INDEXED; + + /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY + addressing. Restrict addressing on SPE for 64-bit types + because of the SUBREG hackery used to address 64-bit floats in + '32-bit' GPRs. To simplify secondary reload, don't allow + update forms on scalar floating point types that can go in the + upper registers. */ + + if (TARGET_UPDATE + && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) + && GET_MODE_SIZE (m) <= 8 + && !VECTOR_MODE_P (m) + && !COMPLEX_MODE_P (m) + && !indexed_only_p + && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8) + && !(m == DFmode && TARGET_UPPER_REGS_DF) + && !(m == SFmode && TARGET_UPPER_REGS_SF)) + { + addr_mask |= RELOAD_REG_PRE_INCDEC; + + /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that + we don't allow PRE_MODIFY for some multi-register + operations. */ + switch (m) + { + default: + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + + case DImode: + if (TARGET_POWERPC64) + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + + case DFmode: + case DDmode: + if (TARGET_DF_INSN) + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + } + } + } + + /* GPR and FPR registers can do REG+OFFSET addressing, except + possibly for SDmode. */ + if ((addr_mask != 0) && !indexed_only_p + && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)) + addr_mask |= RELOAD_REG_OFFSET; + + reg_addr[m].addr_mask[rc] = addr_mask; + any_addr_mask |= addr_mask; + } + + reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; + } +} + + /* Initialize the various global tables that are based on register size. */ static void rs6000_init_hard_regno_mode_ok (bool global_init_p) { - int r, m, c; + ssize_t r, m, c; int align64; int align32; @@ -1990,17 +2399,48 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; - /* Precalculate vector information, this must be set up before the - rs6000_hard_regno_nregs_internal below. */ - for (m = 0; m < NUM_MACHINE_MODES; ++m) + /* Precalculate register class to simpler reload register class. We don't + need all of the register classes that are combinations of different + classes, just the simple ones that have constraint letters. */ + for (c = 0; c < N_REG_CLASSES; c++) + reg_class_to_reg_type[c] = NO_REG_TYPE; + + reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; + reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; + + if (TARGET_VSX) + { + reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; + } + else { - rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE; - rs6000_vector_reload[m][0] = CODE_FOR_nothing; - rs6000_vector_reload[m][1] = CODE_FOR_nothing; + reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; } - for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++) - rs6000_constraints[c] = NO_REGS; + /* Precalculate the valid memory formats as well as the vector information, + this must be set up before the rs6000_hard_regno_nregs_internal calls + below. */ + gcc_assert ((int)VECTOR_NONE == 0); + memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); + memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit)); + + gcc_assert ((int)CODE_FOR_nothing == 0); + memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); + + gcc_assert ((int)NO_REGS == 0); + memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); /* The VSX hardware allows native alignment for vectors, but control whether the compiler believes it can use native alignment or still uses 128-bit alignment. */ @@ -2062,12 +2502,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } } - /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract. - Altivec doesn't have 64-bit support. */ + /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to + do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ if (TARGET_VSX) { rs6000_vector_mem[V2DImode] = VECTOR_VSX; - rs6000_vector_unit[V2DImode] = VECTOR_NONE; + rs6000_vector_unit[V2DImode] + = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; rs6000_vector_align[V2DImode] = align64; } @@ -2076,14 +2517,48 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) { rs6000_vector_unit[DFmode] = VECTOR_VSX; rs6000_vector_mem[DFmode] - = (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE); + = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE); rs6000_vector_align[DFmode] = align64; } + /* Allow TImode in VSX register and set the VSX memory macros. */ + if (TARGET_VSX && TARGET_VSX_TIMODE) + { + rs6000_vector_mem[TImode] = VECTOR_VSX; + rs6000_vector_align[TImode] = align64; + } + /* TODO add SPE and paired floating point vector support. */ /* Register class constraints for the constraints that depend on compile - switches. */ + switches. When the VSX code was added, different constraints were added + based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all + of the VSX registers are used. The register classes for scalar floating + point types is set, based on whether we allow that type into the upper + (Altivec) registers. GCC has register classes to target the Altivec + registers for load/store operations, to select using a VSX memory + operation instead of the traditional floating point operation. The + constraints are: + + d - Register class to use with traditional DFmode instructions. + f - Register class to use with traditional SFmode instructions. + v - Altivec register. + wa - Any VSX register. + wd - Preferred register class for V2DFmode. + wf - Preferred register class for V4SFmode. + wg - Float register for power6x move insns. + wl - Float register if we can do 32-bit signed int loads. + wm - VSX register for ISA 2.07 direct move operations. + wr - GPR if 64-bit mode is permitted. + ws - Register class to do ISA 2.06 DF operations. + wu - Altivec register for ISA 2.07 VSX SF/SI load/stores. + wv - Altivec register for ISA 2.06 VSX DF/DI load/stores. + wt - VSX register for TImode in VSX registers. + ww - Register class to do SF conversions in with VSX operations. + wx - Float register if we can do 32-bit int stores. + wy - Register class to do ISA 2.07 SF operations. + wz - Float register if we can do 32-bit unsigned int loads. */ + if (TARGET_HARD_FLOAT && TARGET_FPRS) rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; @@ -2092,63 +2567,157 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_VSX) { - /* At present, we just use VSX_REGS, but we have different constraints - based on the use, in case we want to fine tune the default register - class used. wa = any VSX register, wf = register class to use for - V4SF, wd = register class to use for V2DF, and ws = register classs to - use for DF scalars. */ rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; - rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; - rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY - ? VSX_REGS - : FLOAT_REGS); + rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; + + if (TARGET_VSX_TIMODE) + rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; + + if (TARGET_UPPER_REGS_DF) + { + rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + } + else + rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS; } + /* Add conditional constraints based on various options, to allow us to + collapse multiple insn patterns. */ if (TARGET_ALTIVEC) rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; - /* Set up the reload helper functions. */ + if (TARGET_MFPGPR) + rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS; + + if (TARGET_LFIWAX) + rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; + + if (TARGET_DIRECT_MOVE) + rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS; + + if (TARGET_POWERPC64) + rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; + + if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) + { + rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS; + } + else if (TARGET_P8_VECTOR) + { + rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; + } + else if (TARGET_VSX) + rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; + + if (TARGET_STFIWX) + rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; + + if (TARGET_LFIWZX) + rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; + + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { if (TARGET_64BIT) { - rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store; - rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load; - rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_di_store; - rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_di_load; - rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_di_store; - rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_di_load; - rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_di_store; - rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_di_load; - rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_di_store; - rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_di_load; - rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_di_store; - rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_di_load; - if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY) + reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; + reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; + reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; + reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; + reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; + reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; + reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; + reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; + reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; + reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; + reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; + if (TARGET_VSX && TARGET_UPPER_REGS_DF) + { + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; + } + if (TARGET_P8_VECTOR) { - rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_di_store; - rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_di_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; + } + if (TARGET_VSX_TIMODE) + { + reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; + reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; + } + if (TARGET_DIRECT_MOVE) + { + if (TARGET_POWERPC64) + { + reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; + reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; + reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; + reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; + reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; + reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; + reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; + + reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; + reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; + reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; + reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; + reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; + reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; + reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; + } + else + { + reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; + reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; + reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; + } } } else { - rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store; - rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load; - rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_si_store; - rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_si_load; - rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_si_store; - rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_si_load; - rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_si_store; - rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_si_load; - rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_si_store; - rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_si_load; - rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_si_store; - rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_si_load; - if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY) + reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; + reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; + reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; + reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; + reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; + reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; + reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; + reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; + reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; + reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; + reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; + reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; + if (TARGET_VSX && TARGET_UPPER_REGS_DF) + { + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; + } + if (TARGET_P8_VECTOR) + { + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; + } + if (TARGET_VSX_TIMODE) { - rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_si_store; - rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_si_load; + reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; + reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; } } } @@ -2267,6 +2836,11 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } } + /* Update the addr mask bits in reg_addr to help secondary reload and go if + legitimate address support to figure out the appropriate addressing to + use. */ + rs6000_setup_reg_addr_masks (); + if (global_init_p || TARGET_DEBUG_TARGET) { if (TARGET_DEBUG_REG) @@ -2369,16 +2943,18 @@ darwin_rs6000_override_options (void) HOST_WIDE_INT rs6000_builtin_mask_calculate (void) { - return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) - | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) - | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) - | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) - | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) - | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) - | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) - | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) - | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) - | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)); + return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) + | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) + | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) + | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) + | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) + | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) + | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) + | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) + | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) + | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) + | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) + | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)); } /* Override command line options. Mostly we process the processor type and @@ -2619,7 +3195,10 @@ rs6000_option_override_internal (bool global_init_p) if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) msg = N_("-mvsx requires hardware floating point"); else - rs6000_isa_flags &= ~ OPTION_MASK_VSX; + { + rs6000_isa_flags &= ~ OPTION_MASK_VSX; + rs6000_isa_flags_explicit |= OPTION_MASK_VSX; + } } else if (TARGET_PAIRED_FLOAT) msg = N_("-mvsx and -mpaired are incompatible"); @@ -2647,9 +3226,24 @@ rs6000_option_override_internal (bool global_init_p) } } + /* If hard-float/altivec/vsx were explicitly turned off then don't allow + the -mcpu setting to enable options that conflict. */ + if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) + && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT + | OPTION_MASK_ALTIVEC + | OPTION_MASK_VSX)) != 0) + rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO + | OPTION_MASK_DIRECT_MOVE) + & ~rs6000_isa_flags_explicit); + + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) + rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags); + /* For the newer switches (vsx, dfp, etc.) set some of the older options, unless the user explicitly used the -mno-