From 6fb122754c7650e4b766619e8fe8a63ba777c41a Mon Sep 17 00:00:00 2001
From: Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Date: Fri, 4 Apr 2014 13:39:27 +0000
Subject: [PATCH] Backport PRs 57615, 57744, 58160, 58587, 58673, 59054

[gcc]

2014-04-04  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Apply mainline
	2013-11-23  Alan Modra  <amodra@gmail.com>
	* config/rs6000/vsx.md (fusion peepholes): Disable when !TARGET_VSX.

	Backport from mainline
	2013-11-12  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/59054
	* config/rs6000/rs6000.md (movdi_internal32): Eliminate
	constraints that would allow DImode into the traditional Altivec
	registers, but cause undesirable code generation when loading 0 as
	a constant.
	(movdi_internal64): Likewise.
	(cmp<mode>_fpr): Do not use %x for CR register output.
	(extendsfdf2_fpr): Fix constraints when -mallow-upper-df and
	-mallow-upper-sf debug switches are used.

	Backport from mainline
	2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (enum rs6000_reload_reg_type): Add new
	fields to the reg_addr array that describes the valid addressing
	mode for any register, general purpose registers, floating point
	registers, and Altivec registers.
	(FIRST_RELOAD_REG_CLASS): Likewise.
	(LAST_RELOAD_REG_CLASS): Likewise.
	(struct reload_reg_map_type): Likewise.
	(reload_reg_map_type): Likewise.
	(RELOAD_REG_VALID): Likewise.
	(RELOAD_REG_MULTIPLE): Likewise.
	(RELOAD_REG_INDEXED): Likewise.
	(RELOAD_REG_OFFSET): Likewise.
	(RELOAD_REG_PRE_INCDEC): Likewise.
	(RELOAD_REG_PRE_MODIFY): Likewise.
	(reg_addr): Likewise.
	(mode_supports_pre_incdec_p): New helper functions to say whether
	a given mode supports PRE_INC, PRE_DEC, and PRE_MODIFY.
	(mode_supports_pre_modify_p): Likewise.
	(rs6000_debug_vector_unit): Rearrange the -mdebug=reg output to
	print the valid address mode bits for each mode.
	(rs6000_debug_print_mode): Likewise.
	(rs6000_debug_reg_global): Likewise.
	(rs6000_setup_reg_addr_masks): New function to set up the address
	mask bits for each type.
	(rs6000_init_hard_regno_mode_ok): Use memset to clear arrays.
	Call rs6000_setup_reg_addr_masks to set up the address mask bits.
	(rs6000_legitimate_address_p): Use mode_supports_pre_incdec_p and
	mode_supports_pre_modify_p to determine if PRE_INC, PRE_DEC, and
	PRE_MODIFY are supported.
	(rs6000_output_move_128bit): Change to use {src,dest}_vmx_p for altivec
	registers, instead of {src,dest}_av_p.
	(rs6000_print_options_internal): Tweak the debug output slightly.

	Backport from mainline
	2013-10-03  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000-builtin.def (XSRDPIM): Use floatdf2,
	ceildf2, btruncdf2, instead of vsx_* name.

	* config/rs6000/vsx.md (vsx_add<mode>3): Change arithmetic
	iterators to only do V2DF and V4SF here.  Move the DF code to
	rs6000.md where it is combined with SF mode.  Replace <VSv> with
	just 'v' since only vector operations are handled with these insns
	after moving the DF support to rs6000.md.
	(vsx_sub<mode>3): Likewise.
	(vsx_mul<mode>3): Likewise.
	(vsx_div<mode>3): Likewise.
	(vsx_fre<mode>2): Likewise.
	(vsx_neg<mode>2): Likewise.
	(vsx_abs<mode>2): Likewise.
	(vsx_nabs<mode>2): Likewise.
	(vsx_smax<mode>3): Likewise.
	(vsx_smin<mode>3): Likewise.
	(vsx_sqrt<mode>2): Likewise.
	(vsx_rsqrte<mode>2): Likewise.
	(vsx_fms<mode>4): Likewise.
	(vsx_nfma<mode>4): Likewise.
	(vsx_copysign<mode>3): Likewise.
	(vsx_btrunc<mode>2): Likewise.
	(vsx_floor<mode>2): Likewise.
	(vsx_ceil<mode>2): Likewise.
	(vsx_smaxsf3): Delete scalar ops that were moved to rs6000.md.
	(vsx_sminsf3): Likewise.
	(vsx_fmadf4): Likewise.
	(vsx_fmsdf4): Likewise.
	(vsx_nfmadf4): Likewise.
	(vsx_nfmsdf4): Likewise.
	(vsx_cmpdf_internal1): Likewise.

	* config/rs6000/rs6000.h (TARGET_SF_SPE): Define macros to make it
	simpler to select whether a target has SPE or traditional floating
	point support in iterators.
	(TARGET_DF_SPE): Likewise.
	(TARGET_SF_FPR): Likewise.
	(TARGET_DF_FPR): Likewise.
	(TARGET_SF_INSN): Macros to say whether floating point support
	exists for a given operation for expanders.
	(TARGET_DF_INSN): Likewise.

	* config/rs6000/rs6000.c (Ftrad): New mode attributes to allow
	combining of SF/DF mode operations, using both traditional and VSX
	registers.
	(Fvsx): Likewise.
	(Ff): Likewise.
	(Fv): Likewise.
	(Fs): Likewise.
	(Ffre): Likewise.
	(FFRE): Likewise.
	(abs<mode>2): Combine SF/DF modes using traditional floating point
	instructions.  Add support for using the upper DF registers with
	VSX support, and SF registers with power8-vector support.  Update
	expanders for operations supported by both the SPE and traditional
	floating point units.
	(abs<mode>2_fpr): Likewise.
	(nabs<mode>2): Likewise.
	(nabs<mode>2_fpr): Likewise.
	(neg<mode>2): Likewise.
	(neg<mode>2_fpr): Likewise.
	(add<mode>3): Likewise.
	(add<mode>3_fpr): Likewise.
	(sub<mode>3): Likewise.
	(sub<mode>3_fpr): Likewise.
	(mul<mode>3): Likewise.
	(mul<mode>3_fpr): Likewise.
	(div<mode>3): Likewise.
	(div<mode>3_fpr): Likewise.
	(sqrt<mode>3): Likewise.
	(sqrt<mode>3_fpr): Likewise.
	(fre<Fs>): Likewise.
	(rsqrt<mode>2): Likewise.
	(cmp<mode>_fpr): Likewise.
	(smax<mode>3): Likewise.
	(smin<mode>3): Likewise.
	(smax<mode>3_vsx): Likewise.
	(smin<mode>3_vsx): Likewise.
	(negsf2): Delete SF operations that are merged with DF.
	(abssf2): Likewise.
	(addsf3): Likewise.
	(subsf3): Likewise.
	(mulsf3): Likewise.
	(divsf3): Likewise.
	(fres): Likewise.
	(fmasf4_fpr): Likewise.
	(fmssf4_fpr): Likewise.
	(nfmasf4_fpr): Likewise.
	(nfmssf4_fpr): Likewise.
	(sqrtsf2): Likewise.
	(rsqrtsf_internal1): Likewise.
	(smaxsf3): Likewise.
	(sminsf3): Likewise.
	(cmpsf_internal1): Likewise.
	(copysign<mode>3_fcpsgn): Add VSX/power8-vector support.
	(negdf2): Delete DF operations that are merged with SF.
	(absdf2): Likewise.
	(nabsdf2): Likewise.
	(adddf3): Likewise.
	(subdf3): Likewise.
	(muldf3): Likewise.
	(divdf3): Likewise.
	(fred): Likewise.
	(rsqrtdf_internal1): Likewise.
	(fmadf4_fpr): Likewise.
	(fmsdf4_fpr): Likewise.
	(nfmadf4_fpr): Likewise.
	(nfmsdf4_fpr): Likewise.
	(sqrtdf2): Likewise.
	(smaxdf3): Likewise.
	(smindf3): Likewise.
	(cmpdf_internal1): Likewise.
	(lrint<mode>di2): Use TARGET_<MODE>_FPR macro.
	(btrunc<mode>2): Delete separate expander, and combine with the
	insn and add VSX instruction support.  Use TARGET_<MODE>_FPR.
	(btrunc<mode>2_fpr): Likewise.
	(ceil<mode>2): Likewise.
	(ceil<mode>2_fpr): Likewise.
	(floor<mode>2): Likewise.
	(floor<mode>2_fpr): Likewise.
	(fma<mode>4_fpr): Combine SF and DF fused multiply/add support.
	Add support for using the upper registers with VSX and
	power8-vector.  Move insns to be closer to the define_expands. On
	VSX systems, prefer the traditional form of FMA over the VSX
	version, since the traditional form allows the target not to
	overlap with the inputs.
	(fms<mode>4_fpr): Likewise.
	(nfma<mode>4_fpr): Likewise.
	(nfms<mode>4_fpr): Likewise.

	Backport from mainline
	2013-09-27  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow
	DFmode, DImode, and SFmode in the upper VSX registers based on the
	-mupper-regs-{df,sf} flags.  Fix wu constraint to be ALTIVEC_REGS
	if -mpower8-vector.  Combine -mvsx-timode handling with the rest
	of the VSX register handling.

	* config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters.
	(f32_sv): Likewise.
	(zero_extendsidi2_lfiwzx): Add support for loading into the
	Altivec registers with -mpower8-vector.  Use wu/wv constraints to
	only do VSX memory options on Altivec registers.
	(extendsidi2_lfiwax): Likewise.
	(extendsfdf2_fpr): Likewise.
	(mov<mode>_hardfloat, SF/SD modes): Likewise.
	(mov<mode>_hardfloat32, DF/DD modes): Likewise.
	(mov<mode>_hardfloat64, DF/DD modes): Likewise.
	(movdi_internal64): Likewise.

	Backport from mainline
	2013-09-23  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_vector_reload): Delete, combine
	reload helper function arrays into a single array reg_addr.
	(reload_fpr_gpr): Likewise.
	(reload_gpr_vsx): Likewise.
	(reload_vsx_gpr): Likewise.
	(struct rs6000_reg_addr): Likewise.
	(reg_addr): Likewise.
	(rs6000_debug_reg_global): Change rs6000_vector_reload,
	reload_fpr_gpr, reload_gpr_vsx, reload_vsx_gpr uses to reg_addr.
	(rs6000_init_hard_regno_mode_ok): Likewise.
	(rs6000_secondary_reload_direct_move): Likewise.
	(rs6000_secondary_reload): Likewise.

	* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add new
	constraints: wu, ww, and wy.  Repurpose wv constraint added during
	power8 changes.  Put wg constraint in alphabetical order.

	* config/rs6000/rs6000.opt (-mvsx-scalar-float): New debug switch
	for future work to add ISA 2.07 VSX single precision support.
	(-mvsx-scalar-double): Change default from -1 to 1, update
	documentation comment.
	(-mvsx-scalar-memory): Rename debug switch to -mupper-regs-df.
	(-mupper-regs-df): New debug switch to control whether DF values
	can go in the traditional Altivec registers.
	(-mupper-regs-sf): New debug switch to control whether SF values
	can go in the traditional Altivec registers.

	* config/rs6000/rs6000.c (rs6000_debug_reg_global): Print wu, ww,
	and wy constraints.
	(rs6000_init_hard_regno_mode_ok): Use ssize_t instead of int for
	loop variables.  Rename -mvsx-scalar-memory to -mupper-regs-df.
	Add new constraints, wu/ww/wy.  Repurpose wv constraint.
	(rs6000_debug_legitimate_address_p): Print if we are running
	before, during, or after reload.
	(rs6000_secondary_reload): Add a comment.
	(rs6000_opt_masks): Add -mupper-regs-df, -mupper-regs-sf.

	* config/rs6000/constraints.md (wa constraint): Sort w<x>
	constraints.  Update documentation string.
	(wd constraint): Likewise.
	(wf constraint): Likewise.
	(wg constraint): Likewise.
	(wn constraint): Likewise.
	(ws constraint): Likewise.
	(wt constraint): Likewise.
	(wx constraint): Likewise.
	(wz constraint): Likewise.
	(wu constraint): New constraint for ISA 2.07 SFmode scalar
	instructions.
	(ww constraint): Likewise.
	(wy constraint): Likewise.
	(wv constraint): Repurpose ISA 2.07 constraint that did not use in
	the previous submissions.
	* doc/md.texi (PowerPC and IBM RS6000): Likewise.

	Backport from mainline
	2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
	restrict TImode addresses to single indirect registers if both
	-mquad-memory and -mvsx-timode are used.
	(rs6000_output_move_128bit): Use quad_load_store_p to determine if
	we should emit load/store quad.  Remove using %y for quad memory
	addresses.

	* config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
	constraints to allow load/store quad on machines where TImode is
	not allowed in VSX registers.  Use 'n' instead of 'F' constraint
	for TImode to load integer constants.

	Backport from mainline
	2013-10-02  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58587
	* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Turn off
	setting -mvsx-timode by default until the underlying problem is
	fixed.
	(RS6000_CPU, power7 defaults): Likewise.

	Backport from trunk
	2013-08-16  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58160
	* config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the
	memory rtx to contain ZERO_EXTEND and SIGN_EXTEND.

	* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands
	array instead of each individual operand as a separate argument.
	(emit_fusion_gpr_load): Likewise.
	(expand_fusion_gpr_load): Add new function declaration.

	* config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling
	signature to have the operands passed as an array, instead of as
	separate arguments.  Allow ZERO_EXTEND to be in the memory
	address, and also SIGN_EXTEND if -mpower8-fusion-sign.  Do not
	depend on the register live/dead flags when peepholes are run.
	(expand_fusion_gpr_load): New function to be called from the
	peephole2 pass, to change the register that addis sets to be the
	target register.
	(emit_fusion_gpr_load): Change the calling signature to have the
	operands passed as an array, instead of as separate arguments.
	Allow ZERO_EXTEND to be in the memory address, and also
	SIGN_EXTEND if -mpower8-fusion-sign.

	* config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused
	unspec enumeration.
	(power8 fusion peephole/peephole2): Rework the fusion peepholes to
	adjust the register addis loads up in the peephole2 pass.  Do not
	depend on the register live/dead state when the peephole pass is
	done.

	Backport from trunk
	2013-07-23  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/vector.md (xor<mode>3): Move 128-bit boolean
	expanders to rs6000.md.
	(ior<mode>3): Likewise.
	(and<mode>3): Likewise.
	(one_cmpl<mode>2): Likewise.
	(nor<mode>3): Likewise.
	(andc<mode>3): Likewise.
	(eqv<mode>3): Likewise.
	(nand<mode>3): Likewise.
	(orc<mode>3): Likewise.

	* config/rs6000/rs6000-protos.h (rs6000_split_logical): New
	declaration.

	* config/rs6000/rs6000.c (rs6000_split_logical_inner): Add support
	to split multi-word logical operations.
	(rs6000_split_logical_di): Likewise.
	(rs6000_split_logical): Likewise.

	* config/rs6000/vsx.md (VSX_L2): Delete, no longer used.
	(vsx_and<mode>3_32bit): Move 128-bit logical insns to rs6000.md,
	and allow TImode operations in 32-bit.
	(vsx_and<mode>3_64bit): Likewise.
	(vsx_ior<mode>3_32bit): Likewise.
	(vsx_ior<mode>3_64bit): Likewise.
	(vsx_xor<mode>3_32bit): Likewise.
	(vsx_xor<mode>3_64bit): Likewise.
	(vsx_one_cmpl<mode>2_32bit): Likewise.
	(vsx_one_cmpl<mode>2_64bit): Likewise.
	(vsx_nor<mode>3_32bit): Likewise.
	(vsx_nor<mode>3_64bit): Likewise.
	(vsx_andc<mode>3_32bit): Likewise.
	(vsx_andc<mode>3_64bit): Likewise.
	(vsx_eqv<mode>3_32bit): Likewise.
	(vsx_eqv<mode>3_64bit): Likewise.
	(vsx_nand<mode>3_32bit): Likewise.
	(vsx_nand<mode>3_64bit): Likewise.
	(vsx_orc<mode>3_32bit): Likewise.
	(vsx_orc<mode>3_64bit): Likewise.

	* config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Always allow vector
	logical types in GPRs.

	* config/rs6000/altivec.md (altivec_and<mode>3): Move 128-bit
	logical insns to rs6000.md, and allow TImode operations in
	32-bit.
	(altivec_ior<mode>3): Likewise.
	(altivec_xor<mode>3): Likewise.
	(altivec_one_cmpl<mode>2): Likewise.
	(altivec_nor<mode>3): Likewise.
	(altivec_andc<mode>3): Likewise.

	* config/rs6000/rs6000.md (BOOL_128): New mode iterators and mode
	attributes for moving the 128-bit logical operations into
	rs6000.md.
	(BOOL_REGS_OUTPUT): Likewise.
	(BOOL_REGS_OP1): Likewise.
	(BOOL_REGS_OP2): Likewise.
	(BOOL_REGS_UNARY): Likewise.
	(BOOL_REGS_AND_CR0): Likewise.
	(one_cmpl<mode>2): Add support for DI logical operations on
	32-bit, splitting the operations to 32-bit.
	(anddi3): Likewise.
	(iordi3): Likewise.
	(xordi3): Likewise.
	(and<mode>3, 128-bit types): Rewrite 2013-06-06 logical operator
	changes to combine the 32/64-bit code, allow logical operations on
	TI mode in 32-bit, and to use similar match_operator patterns like
	scalar mode uses.  Combine the Altivec and VSX code for logical
	operations, and move it here.
	(ior<mode>3, 128-bit types): Likewise.
	(xor<mode>3, 128-bit types): Likewise.
	(one_cmpl<mode>3, 128-bit types): Likewise.
	(nor<mode>3, 128-bit types): Likewise.
	(andc<mode>3, 128-bit types): Likewise.
	(eqv<mode>3, 128-bit types): Likewise.
	(nand<mode>3, 128-bit types): Likewise.
	(orc<mode>3, 128-bit types): Likewise.
	(and<mode>3_internal): Likewise.
	(bool<mode>3_internal): Likewise.
	(boolc<mode>3_internal1): Likewise.
	(boolc<mode>3_internal2): Likewise.
	(boolcc<mode>3_internal1): Likewise.
	(boolcc<mode>3_internal2): Likewise.
	(eqv<mode>3_internal1): Likewise.
	(eqv<mode>3_internal2): Likewise.
	(one_cmpl1<mode>3_internal): Likewise.

	Back port from mainline:
	2013-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* lib/target-supports.exp (check_p8vector_hw_available) Add power8
	support.
	(check_effective_target_powerpc_p8vector_ok): Likewise.
	(is-effective-target): Likewise.
	(check_vect_support_and_set_flags): Likewise.

	Backport from mainline
	2013-07-31  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/predicates.md (fusion_gpr_addis): New predicates
	to support power8 load fusion.
	(fusion_gpr_mem_load): Likewise.

	* config/rs6000/rs6000-modes.def (PTImode): Update a comment.

	* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
	declarations for power8 load fusion.
	(emit_fusion_gpr_load): Likewise.

	* config/rs6000/rs6000.c (rs6000_option_override_internal): If
	tuning for power8, turn on fusion mode by default.  Turn on sign
	extending fusion mode if normal fusion mode is on, and we are at
	-O2 or -O3.
	(fusion_gpr_load_p): New function, return true if we can fuse an
	addis instruction with a dependent load to a GPR.
	(emit_fusion_gpr_load): Emit the instructions for power8 load
	fusion to GPRs.

	* config/rs6000/vsx.md (VSX_M2): New iterator for fusion
	peepholes.
	(VSX load fusion peepholes): New peepholes to fuse together an
	addi instruction with a VSX load instruction.

	* config/rs6000/rs6000.md (GPR load fusion peepholes): New
	peepholes to fuse an addis instruction with a load to a GPR base
	register.  If we are supporting sign extending fusions, convert
	sign extending loads to zero extending loads and add an explicit
	sign extension.

	Backport from mainline
	2013-07-18  Pat Haugen <pthaugen@us.ibm.com>

	* config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag
	interaction for new Power8 flags and VSX.

	Back port from the trunk
	2013-06-28  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/57744
	* config/rs6000/rs6000.h (MODES_TIEABLE_P): Do not allow PTImode
	to tie with any other modes.  Eliminate Altivec vector mode tests,
	since these are a subset of ALTIVEC or VSX vector modes.  Simplify
	code, to return 0 if testing MODE2 for a condition, if we've
	already tested MODE1 for the same condition.

	Backport from mainline
	2013-06-28  Pat Haugen <pthaugen@us.ibm.com>

	* config/rs6000/rs6000.md (define_insn ""): Fix insn type.

	Back port from the trunk
	2013-06-26  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* config/rs6000/power8.md: New.
	* config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor
	setting for power8 entry.
	* config/rs6000/t-rs6000 (MD_INCLUDES): Add power8.md.
	* config/rs6000/rs6000.c (is_microcoded_insn, is_cracked_insn): Adjust
	test for Power4/Power5 only.
	(insn_must_be_first_in_group, insn_must_be_last_in_group): Add Power8
	support.
	(force_new_group): Adjust comment.
	* config/rs6000/rs6000.md: Include power8.md.

	Back port from the trunk
	2013-06-14  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/57615
	* config/rs6000/rs6000.md (mov<mode>_ppc64): Call
	rs6000_output_move_128bit to handle emitting quad memory
	operations.  Set attribute length to 8 bytes.

	Back port from the trunk
	2013-06-13  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_option_override_internal): Move
	test for clearing quad memory on 32-bit later.

	Back port from the trunk

	2013-06-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* config/rs6000/rs6000.c (emit_load_locked): Add support for
	power8 byte, half-word, and quad-word atomic instructions.
	(emit_store_conditional): Likewise.
	(rs6000_expand_atomic_compare_and_swap): Likewise.
	(rs6000_expand_atomic_op): Likewise.

	* config/rs6000/sync.md (larx): Add new modes for power8.
	(stcx): Likewise.
	(AINT): New mode iterator to include TImode as well as normal
	integer modes on power8.
	(fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so
	that VSX registers are not considered.  Use AINT mode iterator
	instead of INT1 to allow inclusion of quad word atomic operations
	on power8.
	(load_locked<mode>): Likewise.
	(store_conditional<mode>): Likewise.
	(atomic_compare_and_swap<mode>): Likewise.
	(atomic_exchange<mode>): Likewise.
	(atomic_nand<mode>): Likewise.
	(atomic_fetch_<fetchop_name><mode>): Likewise.
	(atomic_nand_fetch<mode>): Likewise.
	(mem_thread_fence): Use gen_loadsync_<mode> instead of enumerating
	each type.
	(ATOMIC): On power8, add QImode, HImode modes.
	(load_locked<QHI:mode>_si): Varients of load_locked for QI/HI
	modes that promote to SImode.
	(load_lockedti): Convert TImode arguments to PTImode, so that we
	get a guaranteed even/odd register pair.
	(load_lockedpti): Likewise.
	(store_conditionalti): Likewise.
	(store_conditionalpti): Likewise.

	* config/rs6000/rs6000.md (QHI): New mode iterator for power8
	atomic load/store instructions.
	(HSI): Likewise.

	Back port from the trunk

	2013-06-10  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* config/rs6000/vector.md (GPR move splitter): Do not split moves
	of vectors in GPRS if they are direct moves or quad word load or
	store moves.

	* config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add
	declaration.
	(direct_move_p): Likewise.
	(quad_load_store_p): Likewise.

	* config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register
	classes into bins based on the physical register type.
	(reg_class_to_reg_type): Likewise.
	(IS_STD_REG_TYPE): Likewise.
	(IS_FP_VECT_REG_TYPE): Likewise.
	(reload_fpr_gpr): Arrays to determine what insn to use if we can
	use direct move instructions.
	(reload_gpr_vsx): Likewise.
	(reload_vsx_gpr): Likewise.
	(rs6000_init_hard_regno_mode_ok): Precalculate the register type
	information that is a simplification of register classes.  Also
	precalculate direct move reload helpers.
	(direct_move_p): New function to return true if the operation can
	be done as a direct move instruciton.
	(quad_load_store_p): New function to return true if the operation
	is a quad memory operation.
	(rs6000_legitimize_address): If quad memory, only allow register
	indirect for TImode addresses.
	(rs6000_legitimate_address_p): Likewise.
	(enum reload_reg_type): Delete, replace with rs6000_reg_type.
	(rs6000_reload_register_type): Likewise.
	(register_to_reg_type): Return register type.
	(rs6000_secondary_reload_simple_move): New helper function for
	secondary reload and secondary memory needed to identify anything
	that is a simple move, and does not need reloading.
	(rs6000_secondary_reload_direct_move): New helper function for
	secondary reload to identify cases that can be done with several
	instructions via the direct move instructions.
	(rs6000_secondary_reload_move): New helper function for secondary
	reload to identify moves between register types that can be done.
	(rs6000_secondary_reload): Add support for quad memory operations
	and for direct move.
	(rs6000_secondary_memory_needed): Likewise.
	(rs6000_debug_secondary_memory_needed): Change argument names.
	(rs6000_output_move_128bit): New function to return the move to
	use for 128-bit moves, including knowing about the various
	limitations of quad memory operations.

	* config/rs6000/vsx.md (vsx_mov<mode>): Add support for quad
	memory operations.  call rs6000_output_move_128bit for the actual
	instruciton(s) to generate.
	(vsx_movti_64bit): Likewise.

	* config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values.
	(UNSPEC_P8V_MTVSRWZ): Likewise.
	(UNSPEC_P8V_RELOAD_FROM_GPR): Likewise.
	(UNSPEC_P8V_MTVSRD): Likewise.
	(UNSPEC_P8V_XXPERMDI): Likewise.
	(UNSPEC_P8V_RELOAD_FROM_VSX): Likewise.
	(UNSPEC_FUSION_GPR): Likewise.
	(FMOVE128_GPR): New iterator for direct move.
	(f32_lv): New mode attribute for load/store of SFmode/SDmode
	values.
	(f32_sv): Likewise.
	(f32_dm): Likewise.
	(zero_extend<mode>di2_internal1): Add support for power8 32-bit
	loads and direct move instructions.
	(zero_extendsidi2_lfiwzx): Likewise.
	(extendsidi2_lfiwax): Likewise.
	(extendsidi2_nocell): Likewise.
	(floatsi<mode>2_lfiwax): Likewise.
	(lfiwax): Likewise.
	(floatunssi<mode>2_lfiwzx): Likewise.
	(lfiwzx): Likewise.
	(fix_trunc<mode>_stfiwx): Likewise.
	(fixuns_trunc<mode>_stfiwx): Likewise.
	(mov<mode>_hardfloat, 32-bit floating point): Likewise.
	(mov<move>_hardfloat64, 64-bit floating point): Likewise.
	(parity<mode>2_cmpb): Set length/type attr.
	(unnamed shift right patterns, mov<mode>_internal2): Change type attr
	for 'mr.' to fast_compare.
	(bpermd_<mode>): Change type attr to popcnt.
	(p8_fmrgow_<mode>): New insns for power8 direct move support.
	(p8_mtvsrwz_1): Likewise.
	(p8_mtvsrwz_2): Likewise.
	(reload_fpr_from_gpr<mode>): Likewise.
	(p8_mtvsrd_1): Likewise.
	(p8_mtvsrd_2): Likewise.
	(p8_xxpermdi_<mode>): Likewise.
	(reload_vsx_from_gpr<mode>): Likewise.
	(reload_vsx_from_gprsf): Likewise.
	(p8_mfvsrd_3_<mode>): LIkewise.
	(reload_gpr_from_vsx<mode>): Likewise.
	(reload_gpr_from_vsxsf): Likewise.
	(p8_mfvsrd_4_disf): Likewise.
	(multi-word GPR splits): Do not split direct moves or quad memory
	operations.

	Backport from the trunk

	2013-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions):
	Document new power8 builtins.

	* config/rs6000/vector.md (and<mode>3): Add a clobber/scratch of a
	condition code register, to allow 128-bit logical operations to be
	done in the VSX or GPR registers.
	(nor<mode>3): Use the canonical form for nor.
	(eqv<mode>3): Add expanders for power8 xxleqv, xxlnand, xxlorc,
	vclz*, and vpopcnt* vector instructions.
	(nand<mode>3): Likewise.
	(orc<mode>3): Likewise.
	(clz<mode>2): LIkewise.
	(popcount<mode>2): Likewise.

	* config/rs6000/predicates.md (int_reg_operand): Rework tests so
	that only the GPRs are recognized.

	* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
	support for new power8 builtins.

	* config/rs6000/rs6000-builtin.def (xscvspdpn): Add new power8
	builtin functions.
	(xscvdpspn): Likewise.
	(vclz): Likewise.
	(vclzb): Likewise.
	(vclzh): Likewise.
	(vclzw): Likewise.
	(vclzd): Likewise.
	(vpopcnt): Likewise.
	(vpopcntb): Likewise.
	(vpopcnth): Likewise.
	(vpopcntw): Likewise.
	(vpopcntd): Likewise.
	(vgbbd): Likewise.
	(vmrgew): Likewise.
	(vmrgow): Likewise.
	(eqv): Likewise.
	(eqv_v16qi3): Likewise.
	(eqv_v8hi3): Likewise.
	(eqv_v4si3): Likewise.
	(eqv_v2di3): Likewise.
	(eqv_v4sf3): Likewise.
	(eqv_v2df3): Likewise.
	(nand): Likewise.
	(nand_v16qi3): Likewise.
	(nand_v8hi3): Likewise.
	(nand_v4si3): Likewise.
	(nand_v2di3): Likewise.
	(nand_v4sf3): Likewise.
	(nand_v2df3): Likewise.
	(orc): Likewise.
	(orc_v16qi3): Likewise.
	(orc_v8hi3): Likewise.
	(orc_v4si3): Likewise.
	(orc_v2di3): Likewise.
	(orc_v4sf3): Likewise.
	(orc_v2df3): Likewise.

	* config/rs6000/rs6000.c (rs6000_option_override_internal): Only
	allow power8 quad mode in 64-bit.
	(rs6000_builtin_vectorized_function): Add support to vectorize
	ISA 2.07 count leading zeros, population count builtins.
	(rs6000_expand_vector_init): On ISA 2.07 use xscvdpspn to form
	V4SF vectors instead of xscvdpsp to avoid IEEE related traps.
	(builtin_function_type): Add vgbbd builtin function which takes an
	unsigned argument.
	(altivec_expand_vec_perm_const): Add support for new power8 merge
	instructions.

	* config/rs6000/vsx.md (VSX_L2): New iterator for 128-bit types,
	that does not include TImdoe for use with 32-bit.
	(UNSPEC_VSX_CVSPDPN): Support for power8 xscvdpspn and xscvspdpn
	instructions.
	(UNSPEC_VSX_CVDPSPN): Likewise.
	(vsx_xscvdpspn): Likewise.
	(vsx_xscvspdpn): Likewise.
	(vsx_xscvdpspn_scalar): Likewise.
	(vsx_xscvspdpn_directmove): Likewise.
	(vsx_and<mode>3): Split logical operations into 32-bit and
	64-bit. Add support to do logical operations on TImode as well as
	VSX vector types.  Allow logical operations to be done in either
	VSX registers or in general purpose registers in 64-bit mode.  Add
	splitters if GPRs were used. For AND, add clobber of CCmode to
	allow use of ANDI on GPRs.  Rewrite nor to use the canonical RTL
	encoding.
	(vsx_and<mode>3_32bit): Likewise.
	(vsx_and<mode>3_64bit): Likewise.
	(vsx_ior<mode>3): Likewise.
	(vsx_ior<mode>3_32bit): Likewise.
	(vsx_ior<mode>3_64bit): Likewise.
	(vsx_xor<mode>3): Likewise.
	(vsx_xor<mode>3_32bit): Likewise.
	(vsx_xor<mode>3_64bit): Likewise.
	(vsx_one_cmpl<mode>2): Likewise.
	(vsx_one_cmpl<mode>2_32bit): Likewise.
	(vsx_one_cmpl<mode>2_64bit): Likewise.
	(vsx_nor<mode>3): Likewise.
	(vsx_nor<mode>3_32bit): Likewise.
	(vsx_nor<mode>3_64bit): Likewise.
	(vsx_andc<mode>3): Likewise.
	(vsx_andc<mode>3_32bit): Likewise.
	(vsx_andc<mode>3_64bit): Likewise.
	(vsx_eqv<mode>3_32bit): Add support for power8 xxleqv, xxlnand,
	and xxlorc instructions.
	(vsx_eqv<mode>3_64bit): Likewise.
	(vsx_nand<mode>3_32bit): Likewise.
	(vsx_nand<mode>3_64bit): Likewise.
	(vsx_orc<mode>3_32bit): Likewise.
	(vsx_orc<mode>3_64bit): Likewise.

	* config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Update comment.

	* config/rs6000/altivec.md (UNSPEC_VGBBD): Add power8 vgbbd
	instruction.
	(p8_vmrgew): Add power8 vmrgew and vmrgow instructions.
	(p8_vmrgow): Likewise.
	(altivec_and<mode>3): Add clobber of CCmode to allow AND using
	GPRs to be split under VSX.
	(p8v_clz<mode>2): Add power8 count leading zero support.
	(p8v_popcount<mode>2): Add power8 population count support.
	(p8v_vgbbd): Add power8 gather bits by bytes by doubleword
	support.

	* config/rs6000/rs6000.md (eqv<mode>3): Add support for powerp eqv
	instruction.

	* config/rs6000/altivec.h (vec_eqv): Add defines to export power8
	builtin functions.
	(vec_nand): Likewise.
	(vec_vclz): Likewise.
	(vec_vclzb): Likewise.
	(vec_vclzd): Likewise.
	(vec_vclzh): Likewise.
	(vec_vclzw): Likewise.
	(vec_vgbbd): Likewise.
	(vec_vmrgew): Likewise.
	(vec_vmrgow): Likewise.
	(vec_vpopcnt): Likewise.
	(vec_vpopcntb): Likewise.
	(vec_vpopcntd): Likewise.
	(vec_vpopcnth): Likewise.
	(vec_vpopcntw): Likewise.

	Backport from trunk

	2013-05-29  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI
	instructions.
	(VEC_A): Likewise.
	(VEC_C): Likewise.
	(vrotl<mode>3): Likewise.
	(vashl<mode>3): Likewise.
	(vlshr<mode>3): Likewise.
	(vashr<mode>3): Likewise.

	* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
	support for power8 V2DI builtins.

	* config/rs6000/rs6000-builtin.def (abs_v2di): Add support for
	power8 V2DI builtins.
	(vupkhsw): Likewise.
	(vupklsw): Likewise.
	(vaddudm): Likewise.
	(vminsd): Likewise.
	(vmaxsd): Likewise.
	(vminud): Likewise.
	(vmaxud): Likewise.
	(vpkudum): Likewise.
	(vpksdss): Likewise.
	(vpkudus): Likewise.
	(vpksdus): Likewise.
	(vrld): Likewise.
	(vsld): Likewise.
	(vsrd): Likewise.
	(vsrad): Likewise.
	(vsubudm): Likewise.
	(vcmpequd): Likewise.
	(vcmpgtsd): Likewise.
	(vcmpgtud): Likewise.
	(vcmpequd_p): Likewise.
	(vcmpgtsd_p): Likewise.
	(vcmpgtud_p): Likewise.
	(vupkhsw): Likewise.
	(vupklsw): Likewise.
	(vaddudm): Likewise.
	(vmaxsd): Likewise.
	(vmaxud): Likewise.
	(vminsd): Likewise.
	(vminud): Likewise.
	(vpksdss): Likewise.
	(vpksdus): Likewise.
	(vpkudum): Likewise.
	(vpkudus): Likewise.
	(vrld): Likewise.
	(vsld): Likewise.
	(vsrad): Likewise.
	(vsrd): Likewise.
	(vsubudm): Likewise.

	* config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Add
	support for power8 V2DI instructions.

	* config/rs6000/altivec.md (UNSPEC_VPKUHUM): Add support for
	power8 V2DI instructions.  Combine pack and unpack insns to use an
	iterator for each mode.  Check whether a particular mode supports
	Altivec instructions instead of just checking TARGET_ALTIVEC.
	(UNSPEC_VPKUWUM): Likewise.
	(UNSPEC_VPKSHSS): Likewise.
	(UNSPEC_VPKSWSS): Likewise.
	(UNSPEC_VPKUHUS): Likewise.
	(UNSPEC_VPKSHUS): Likewise.
	(UNSPEC_VPKUWUS): Likewise.
	(UNSPEC_VPKSWUS): Likewise.
	(UNSPEC_VPACK_SIGN_SIGN_SAT): Likewise.
	(UNSPEC_VPACK_SIGN_UNS_SAT): Likewise.
	(UNSPEC_VPACK_UNS_UNS_SAT): Likewise.
	(UNSPEC_VPACK_UNS_UNS_MOD): Likewise.
	(UNSPEC_VUPKHSB): Likewise.
	(UNSPEC_VUNPACK_HI_SIGN): Likewise.
	(UNSPEC_VUNPACK_LO_SIGN): Likewise.
	(UNSPEC_VUPKHSH): Likewise.
	(UNSPEC_VUPKLSB): Likewise.
	(UNSPEC_VUPKLSH): Likewise.
	(VI2): Likewise.
	(VI_char): Likewise.
	(VI_scalar): Likewise.
	(VI_unit): Likewise.
	(VP): Likewise.
	(VP_small): Likewise.
	(VP_small_lc): Likewise.
	(VU_char): Likewise.
	(add<mode>3): Likewise.
	(altivec_vaddcuw): Likewise.
	(altivec_vaddu<VI_char>s): Likewise.
	(altivec_vadds<VI_char>s): Likewise.
	(sub<mode>3): Likewise.
	(altivec_vsubcuw): Likewise.
	(altivec_vsubu<VI_char>s): Likewise.
	(altivec_vsubs<VI_char>s): Likewise.
	(altivec_vavgs<VI_char>): Likewise.
	(altivec_vcmpbfp): Likewise.
	(altivec_eq<mode>): Likewise.
	(altivec_gt<mode>): Likewise.
	(altivec_gtu<mode>): Likewise.
	(umax<mode>3): Likewise.
	(smax<mode>3): Likewise.
	(umin<mode>3): Likewise.
	(smin<mode>3): Likewise.
	(altivec_vpkuhum): Likewise.
	(altivec_vpkuwum): Likewise.
	(altivec_vpkshss): Likewise.
	(altivec_vpkswss): Likewise.
	(altivec_vpkuhus): Likewise.
	(altivec_vpkshus): Likewise.
	(altivec_vpkuwus): Likewise.
	(altivec_vpkswus): Likewise.
	(altivec_vpks<VI_char>ss): Likewise.
	(altivec_vpks<VI_char>us): Likewise.
	(altivec_vpku<VI_char>us): Likewise.
	(altivec_vpku<VI_char>um): Likewise.
	(altivec_vrl<VI_char>): Likewise.
	(altivec_vsl<VI_char>): Likewise.
	(altivec_vsr<VI_char>): Likewise.
	(altivec_vsra<VI_char>): Likewise.
	(altivec_vsldoi_<mode>): Likewise.
	(altivec_vupkhsb): Likewise.
	(altivec_vupkhs<VU_char>): Likewise.
	(altivec_vupkls<VU_char>): Likewise.
	(altivec_vupkhsh): Likewise.
	(altivec_vupklsb): Likewise.
	(altivec_vupklsh): Likewise.
	(altivec_vcmpequ<VI_char>_p): Likewise.
	(altivec_vcmpgts<VI_char>_p): Likewise.
	(altivec_vcmpgtu<VI_char>_p): Likewise.
	(abs<mode>2): Likewise.
	(vec_unpacks_hi_v16qi): Likewise.
	(vec_unpacks_hi_v8hi): Likewise.
	(vec_unpacks_lo_v16qi): Likewise.
	(vec_unpacks_hi_<VP_small_lc>): Likewise.
	(vec_unpacks_lo_v8hi): Likewise.
	(vec_unpacks_lo_<VP_small_lc>): Likewise.
	(vec_pack_trunc_v8h): Likewise.
	(vec_pack_trunc_v4si): Likewise.
	(vec_pack_trunc_<mode>): Likewise.

	* config/rs6000/altivec.h (vec_vaddudm): Add defines for power8
	V2DI builtins.
	(vec_vmaxsd): Likewise.
	(vec_vmaxud): Likewise.
	(vec_vminsd): Likewise.
	(vec_vminud): Likewise.
	(vec_vpksdss): Likewise.
	(vec_vpksdus): Likewise.
	(vec_vpkudum): Likewise.
	(vec_vpkudus): Likewise.
	(vec_vrld): Likewise.
	(vec_vsld): Likewise.
	(vec_vsrad): Likewise.
	(vec_vsrd): Likewise.
	(vec_vsubudm): Likewise.
	(vec_vupkhsw): Likewise.
	(vec_vupklsw): Likewise.

	2013-05-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add
	documentation for the power8 crypto builtins.

	* config/rs6000/t-rs6000 (MD_INCLUDES): Add crypto.md.

	* config/rs6000/rs6000-builtin.def (BU_P8V_AV_1): Add support
	macros for defining power8 builtin functions.
	(BU_P8V_AV_2): Likewise.
	(BU_P8V_AV_P): Likewise.
	(BU_P8V_VSX_1): Likewise.
	(BU_P8V_OVERLOAD_1): Likewise.
	(BU_P8V_OVERLOAD_2): Likewise.
	(BU_CRYPTO_1): Likewise.
	(BU_CRYPTO_2): Likewise.
	(BU_CRYPTO_3): Likewise.
	(BU_CRYPTO_OVERLOAD_1): Likewise.
	(BU_CRYPTO_OVERLOAD_2): Likewise.
	(XSCVSPDP): Fix typo, point to the correct instruction.
	(VCIPHER): Add power8 crypto builtins.
	(VCIPHERLAST): Likewise.
	(VNCIPHER): Likewise.
	(VNCIPHERLAST): Likewise.
	(VPMSUMB): Likewise.
	(VPMSUMH): Likewise.
	(VPMSUMW): Likewise.
	(VPERMXOR_V2DI): Likewise.
	(VPERMXOR_V4SI: Likewise.
	(VPERMXOR_V8HI: Likewise.
	(VPERMXOR_V16QI: Likewise.
	(VSHASIGMAW): Likewise.
	(VSHASIGMAD): Likewise.
	(VPMSUM): Likewise.
	(VPERMXOR): Likewise.
	(VSHASIGMA): Likewise.

	* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
	__CRYPTO__ if the crypto instructions are available.
	(altivec_overloaded_builtins): Add support for overloaded power8
	builtins.

	* config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add
	support for power8 crypto builtins.
	(builtin_function_type): Likewise.
	(altivec_init_builtins): Add support for builtins that take vector
	long long (V2DI) arguments.

	* config/rs6000/crypto.md: New file, define power8 crypto
	instructions.

	2013-05-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* doc/invoke.texi (Option Summary): Add power8 options.
	(RS/6000 and PowerPC Options): Likewise.

	* doc/md.texi (PowerPC and IBM RS6000 constraints): Update to use
	constraints.md instead of rs6000.h.  Reorder w* constraints.  Add
	wm, wn, wr documentation.

	* gcc/config/rs6000/constraints.md (wm): New constraint for VSX
	registers if direct move instructions are enabled.
	(wn): New constraint for no registers.
	(wq): New constraint for quad word even GPR registers.
	(wr): New constraint if 64-bit instructions are enabled.
	(wv): New constraint if power8 vector instructions are enabled.
	(wQ): New constraint for quad word memory locations.

	* gcc/config/rs6000/predicates.md (const_0_to_15_operand): New
	constraint for 0..15 for crypto instructions.
	(gpc_reg_operand): If VSX allow registers in VSX registers as well
	as GPR and floating point registers.
	(int_reg_operand): New predicate to match only GPR registers.
	(base_reg_operand): New predicate to match base registers.
	(quad_int_reg_operand): New predicate to match even GPR registers
	for quad memory operations.
	(vsx_reg_or_cint_operand): New predicate to allow vector logical
	operations in both GPR and VSX registers.
	(quad_memory_operand): New predicate for quad memory operations.
	(reg_or_indexed_operand): New predicate for direct move support.

	* gcc/config/rs6000/rs6000-cpus.def (ISA_2_5_MASKS_EMBEDDED):
	Inherit from ISA_2_4_MASKS, not ISA_2_2_MASKS.
	(ISA_2_7_MASKS_SERVER): New mask for ISA 2.07 (i.e. power8).
	(POWERPC_MASKS): Add power8 options.
	(power8 cpu): Use ISA_2_7_MASKS_SERVER instead of specifying the
	various options.

	* gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros):
	Define _ARCH_PWR8 and __POWER8_VECTOR__ for power8.

	* gcc/config/rs6000/rs6000.opt (-mvsx-timode): Add documentation.
	(-mpower8-fusion): New power8 options.
	(-mpower8-fusion-sign): Likewise.
	(-mpower8-vector): Likewise.
	(-mcrypto): Likewise.
	(-mdirect-move): Likewise.
	(-mquad-memory): Likewise.

	* gcc/config/rs6000/rs6000.c (power8_cost): Initial definition for
	power8.
	(rs6000_hard_regno_mode_ok): Make PTImode only match even GPR
	registers.
	(rs6000_debug_reg_print): Print the base register class if
	-mdebug=reg.
	(rs6000_debug_vector_unit): Add p8_vector.
	(rs6000_debug_reg_global): If -mdebug=reg, print power8 constraint
	definitions.  Also print fusion state.
	(rs6000_init_hard_regno_mode_ok): Set up power8 constraints.
	(rs6000_builtin_mask_calculate): Add power8 builtin support.
	(rs6000_option_override_internal): Add support for power8.
	(rs6000_common_init_builtins): Add debugging for skipped builtins
	if -mdebug=builtin.
	(rs6000_adjust_cost): Add power8 support.
	(rs6000_issue_rate): Likewise.
	(insn_must_be_first_in_group): Likewise.
	(insn_must_be_last_in_group): Likewise.
	(force_new_group): Likewise.
	(rs6000_register_move_cost): Likewise.
	(rs6000_opt_masks): Likewise.

	* config/rs6000/rs6000.h (ASM_CPU_POWER8_SPEC): If we don't have a
	power8 capable assembler, default to power7 options.
	(TARGET_DIRECT_MOVE): Likewise.
	(TARGET_CRYPTO): Likewise.
	(TARGET_P8_VECTOR): Likewise.
	(VECTOR_UNIT_P8_VECTOR_P): Define power8 vector support.
	(VECTOR_UNIT_VSX_OR_P8_VECTOR_P): Likewise.
	(VECTOR_MEM_P8_VECTOR_P): Likewise.
	(VECTOR_MEM_VSX_OR_P8_VECTOR_P): Likewise.
	(VECTOR_MEM_ALTIVEC_OR_VSX_P): Likewise.
	(TARGET_XSCVDPSPN): Likewise.
	(TARGET_XSCVSPDPN): Likewsie.
	(TARGET_SYNC_HI_QI): Likewise.
	(TARGET_SYNC_TI): Likewise.
	(MASK_CRYPTO): Likewise.
	(MASK_DIRECT_MOVE): Likewise.
	(MASK_P8_FUSION): Likewise.
	(MASK_P8_VECTOR): Likewise.
	(REG_ALLOC_ORDER): Move fr13 to be lower in priority so that the
	TFmode temporary used by some of the direct move instructions to
	get two FP temporary registers does not force creation of a stack
	frame.
	(VLOGICAL_REGNO_P): Allow vector logical operations in GPRs.
	(MODES_TIEABLE_P): Move the VSX tests above the Altivec tests so
	that any VSX registers are tieable, even if they are also an
	Altivec vector mode.
	(r6000_reg_class_enum): Add wm, wr, wv constraints.
	(RS6000_BTM_P8_VECTOR): Power8 builtin support.
	(RS6000_BTM_CRYPTO): Likewise.
	(RS6000_BTM_COMMON): Likewise.

	* config/rs6000/rs6000.md (cpu attribute): Add power8.
	* config/rs6000/rs6000-opts.h (PROCESSOR_POWER8): Likewise.
	(enum rs6000_vector): Add power8 vector support.


	Backport from mainline
	2013-03-20  Pat Haugen <pthaugen@us.ibm.com>

	* config/rs6000/predicates.md (indexed_address, update_address_mem
	update_indexed_address_mem): New predicates.
	* config/rs6000/vsx.md (vsx_extract_<mode>_zero): Set correct "type"
	attribute for load/store instructions.
	* config/rs6000/dfp.md (movsd_store): Likewise.
	(movsd_load): Likewise.
	* config/rs6000/rs6000.md (zero_extend<mode>di2_internal1): Likewise.
	(unnamed HI->DI extend define_insn): Likewise.
	(unnamed SI->DI extend define_insn): Likewise.
	(unnamed QI->SI extend define_insn): Likewise.
	(unnamed QI->HI extend define_insn): Likewise.
	(unnamed HI->SI extend define_insn): Likewise.
	(unnamed HI->SI extend define_insn): Likewise.
	(extendsfdf2_fpr): Likewise.
	(movsi_internal1): Likewise.
	(movsi_internal1_single): Likewise.
	(movhi_internal): Likewise.
	(movqi_internal): Likewise.
	(movcc_internal1): Correct mnemonic for stw insn. Set correct "type"
	attribute for load/store instructions.
	(mov<mode>_hardfloat): Set correct "type" attribute for load/store
	instructions.
	(mov<mode>_softfloat): Likewise.
	(mov<mode>_hardfloat32): Likewise.
	(mov<mode>_hardfloat64): Likewise.
	(mov<mode>_softfloat64): Likewise.
	(movdi_internal32): Likewise.
	(movdi_internal64): Likewise.
	(probe_stack_<mode>): Likewise.

	Backport from mainline
	2013-03-20  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/vector.md (VEC_R): Add 32-bit integer, binary
	floating point, and decimal floating point to reload iterator.

	* config/rs6000/constraints.md (wl constraint): New constraints to
	return FLOAT_REGS if certain options are used to reduce the number
	of separate patterns that exist in the file.
	(wx constraint): Likewise.
	(wz constraint): Likewise.

	* config/rs6000/rs6000.c (rs6000_debug_reg_global): If
	-mdebug=reg, print wg, wl, wx, and wz constraints.
	(rs6000_init_hard_regno_mode_ok): Initialize new constraints.
	Initialize the reload functions for 64-bit binary/decimal floating
	point types.
	(reg_offset_addressing_ok_p): If we are on a power7 or later, use
	LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
	create the buffer on the stack to overcome not having a 32-bit
	load and store.
	(rs6000_emit_move): Likewise.
	(rs6000_secondary_memory_needed_rtx): Likewise.
	(rs6000_alloc_sdmode_stack_slot): Likewise.
	(rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
	via xxlxor, just like DFmode 0.0.

	* config/rs6000/rs6000.h (TARGET_NO_SDMODE_STACK): New macro)
	(define as 1 if we are running on a power7 or newer.
	(enum r6000_reg_class_enum): Add new constraints.

	* config/rs6000/dfp.md (movsd): Delete, combine with binary
	floating point moves in rs6000.md.  Combine power6x (mfpgpr) moves
	with other moves by using conditional constraits (wg).  Use LFIWZX
	and STFIWX for loading SDmode on power7.  Use xxlxor to create
	0.0f.
	(movsd splitter): Likewise.
	(movsd_hardfloat): Likewise.
	(movsd_softfloat): Likewise.

	* config/rs6000/rs6000.md (FMOVE32): New iterators to combine
	binary and decimal floating point moves.
	(fmove_ok): New attributes to combine binary and decimal floating
	point moves, and to combine power6x (mfpgpr) moves along normal
	floating moves.
	(real_value_to_target): Likewise.
	(f32_lr): Likewise.
	(f32_lm): Likewise.
	(f32_li): Likewise.
	(f32_sr): Likewise.
	(f32_sm): Likewise.
	(f32_si): Likewise.
	(movsf): Combine binary and decimal floating point moves.  Combine
	power6x (mfpgpr) moves with other moves by using conditional
	constraits (wg).  Use LFIWZX and STFIWX for loading SDmode on
	power7.
	(mov<mode> for SFmode/SDmode); Likewise.
	(SFmode/SDmode splitters): Likewise.
	(movsf_hardfloat): Likewise.
	(mov<mode>_hardfloat for SFmode/SDmode): Likewise.
	(movsf_softfloat): Likewise.
	(mov<mode>_softfloat for SFmode/SDmode): Likewise.

	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document wl)
	(wx and wz constraints.

	* config/rs6000/constraints.md (wg constraint): New constraint to
	return FLOAT_REGS if -mmfpgpr (power6x) was used.

	* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wg
	constraint.

	* config/rs6000/rs6000.c (rs6000_debug_reg_global): If
	-mdebug=reg, print wg, wl, wx, and wz constraints.
	(rs6000_init_hard_regno_mode_ok): Initialize new constraints.
	Initialize the reload functions for 64-bit binary/decimal floating
	point types.
	(reg_offset_addressing_ok_p): If we are on a power7 or later, use
	LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
	create the buffer on the stack to overcome not having a 32-bit
	load and store.
	(rs6000_emit_move): Likewise.
	(rs6000_secondary_memory_needed_rtx): Likewise.
	(rs6000_alloc_sdmode_stack_slot): Likewise.
	(rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
	via xxlxor, just like DFmode 0.0.


	* config/rs6000/dfp.md (movdd): Delete, combine with binary
	floating point moves in rs6000.md.  Combine power6x (mfpgpr) moves
	with other moves by using conditional constraits (wg).  Use LFIWZX
	and STFIWX for loading SDmode on power7.
	(movdd splitters): Likewise.
	(movdd_hardfloat32): Likewise.
	(movdd_softfloat32): Likewise.
	(movdd_hardfloat64_mfpgpr): Likewise.
	(movdd_hardfloat64): Likewise.
	(movdd_softfloat64): Likewise.

	* config/rs6000/rs6000.md (FMOVE64): New iterators to combine
	64-bit binary and decimal floating point moves.
	(FMOVE64X): Likewise.
	(movdf): Combine 64-bit binary and decimal floating point moves.
	Combine power6x (mfpgpr) moves with other moves by using
	conditional constraits (wg).
	(mov<mode> for DFmode/DDmode): Likewise.
	(DFmode/DDmode splitters): Likewise.
	(movdf_hardfloat32): Likewise.
	(mov<mode>_hardfloat32 for DFmode/DDmode): Likewise.
	(movdf_softfloat32): Likewise.
	(movdf_hardfloat64_mfpgpr): Likewise.
	(movdf_hardfloat64): Likewise.
	(mov<mode>_hardfloat64 for DFmode/DDmode): Likewise.
	(movdf_softfloat64): Likewise.
	(mov<mode>_softfloat64 for DFmode/DDmode): Likewise.
	(reload_<mode>_load): Move to later in the file so they aren't in
	the middle of the floating point move insns.
	(reload_<mode>_store): Likewise.

	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document wg
	constraint.

	* config/rs6000/rs6000.c (rs6000_debug_reg_global): Print out wg
	constraint if -mdebug=reg.
	(rs6000_initi_hard_regno_mode_ok): Enable wg constraint if
	-mfpgpr.  Enable using dd reload support if needed.

	* config/rs6000/dfp.md (movtd): Delete, combine with 128-bit
	binary and decimal floating point moves in rs6000.md.
	(movtd_internal): Likewise.

	* config/rs6000/rs6000.md (FMOVE128): Combine 128-bit binary and
	decimal floating point moves.
	(movtf): Likewise.
	(movtf_internal): Likewise.
	(mov<mode>_internal, TDmode/TFmode): Likewise.
	(movtf_softfloat): Likewise.
	(mov<mode>_softfloat, TDmode/TFmode): Likewise.

	* config/rs6000/rs6000.md (movdi_mfpgpr): Delete, combine with
	movdi_internal64, using wg constraint for move direct operations.
	(movdi_internal64): Likewise.

	* config/rs6000/rs6000.c (rs6000_debug_reg_global): Print
	MODES_TIEABLE_P for selected modes.  Print the numerical value of
	the various virtual registers. Use GPR/FPR first/last values)
	(instead of hard coding the register numbers.  Print which modes
	have reload functions registered.
	(rs6000_option_override_internal): If -mdebug=reg, trace the
	options settings before/after setting cpu, target and subtarget
	settings.
	(rs6000_secondary_reload_trace): Improve the RTL dump for
	-mdebug=addr and for secondary reload failures in
	rs6000_secondary_reload_inner.
	(rs6000_secondary_reload_fail): Likewise.
	(rs6000_secondary_reload_inner): Likewise.

	* config/rs6000/rs6000.md (FIRST_GPR_REGNO): Add convenience
	macros for first/last GPR and FPR registers.
	(LAST_GPR_REGNO): Likewise.
	(FIRST_FPR_REGNO): Likewise.
	(LAST_FPR_REGNO): Likewise.

	* config/rs6000/vector.md (mul<mode>3): Use the combined macro
	VECTOR_UNIT_ALTIVEC_OR_VSX_P instead of separate calls to
	VECTOR_UNIT_ALTIVEC_P and VECTOR_UNIT_VSX_P.
	(vcond<mode><mode>): Likewise.
	(vcondu<mode><mode>): Likewise.
	(vector_gtu<mode>): Likewise.
	(vector_gte<mode>): Likewise.
	(xor<mode>3): Don't allow logical operations on TImode in 32-bit
	to prevent the compiler from converting DImode operations to
	TImode.
	(ior<mode>3): Likewise.
	(and<mode>3): Likewise.
	(one_cmpl<mode>2): Likewise.
	(nor<mode>3): Likewise.
	(andc<mode>3): Likewise.

	* config/rs6000/constraints.md (wt constraint): New constraint
	that returns VSX_REGS if TImode is allowed in VSX registers.

	* config/rs6000/predicates.md (easy_fp_constant): 0.0f is an easy
	constant under VSX.

	* config/rs6000/rs6000-modes.def (PTImode): Define, PTImode is
	similar to TImode, but it is restricted to being in the GPRs.

	* config/rs6000/rs6000.opt (-mvsx-timode): New switch to allow
	TImode to occupy a single VSX register.

	* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Default to
	-mvsx-timode for power7/power8.
	(power7 cpu): Likewise.
	(power8 cpu): Likewise.

	* config/rs6000/rs6000.c (rs6000_hard_regno_nregs_internal): Make
	sure that TFmode/TDmode take up two registers if they are ever
	allowed in the upper VSX registers.
	(rs6000_hard_regno_mode_ok): If -mvsx-timode, allow TImode in VSX
	registers.
	(rs6000_init_hard_regno_mode_ok): Likewise.
	(rs6000_debug_reg_global): Add debugging for PTImode and wt
	constraint.  Print if LRA is turned on.
	(rs6000_option_override_internal): Give an error if -mvsx-timode
	and VSX is not enabled.
	(invalid_e500_subreg): Handle PTImode, restricting it to GPRs.  If
	-mvsx-timode, restrict TImode to reg+reg addressing, and PTImode
	to reg+offset addressing.  Use PTImode when checking offset
	addresses for validity.
	(reg_offset_addressing_ok_p): Likewise.
	(rs6000_legitimate_offset_address_p): Likewise.
	(rs6000_legitimize_address): Likewise.
	(rs6000_legitimize_reload_address): Likewise.
	(rs6000_legitimate_address_p): Likewise.
	(rs6000_eliminate_indexed_memrefs): Likewise.
	(rs6000_emit_move): Likewise.
	(rs6000_secondary_reload): Likewise.
	(rs6000_secondary_reload_inner): Handle PTImode.  Allow 64-bit
	reloads to fpr registers to continue to use reg+offset addressing)
	(but 64-bit reloads to altivec registers need reg+reg addressing.
	Drop test for PRE_MODIFY, since VSX loads/stores no longer support
	it.  Treat LO_SUM like a PLUS operation.
	(rs6000_secondary_reload_class): If type is 64-bit, prefer to use
	FLOAT_REGS instead of VSX_RGS to allow use of reg+offset
	addressing.
	(rs6000_cannot_change_mode_class): Do not allow TImode in VSX
	registers to share a register with a smaller sized type, since VSX
	puts scalars in the upper 64-bits.
	(print_operand): Add support for PTImode.
	(rs6000_register_move_cost): Use VECTOR_MEM_VSX_P instead of
	VECTOR_UNIT_VSX_P to catch types that can be loaded in VSX
	registers, but don't have arithmetic support.
	(rs6000_memory_move_cost): Add test for VSX.
	(rs6000_opt_masks): Add -mvsx-timode.

	* config/rs6000/vsx.md (VSm): Change to use 64-bit aligned moves
	for TImode.
	(VSs): Likewise.
	(VSr): Use wt constraint for TImode.
	(VSv): Drop TImode support.
	(vsx_movti): Delete, replace with versions for 32-bit and 64-bit.
	(vsx_movti_64bit): Likewise.
	(vsx_movti_32bit): Likewise.
	(vec_store_<mode>): Use VSX iterator instead of vector iterator.
	(vsx_and<mode>3): Delete use of '?' constraint on inputs, just put
	one '?' on the appropriate output constraint.  Do not allow TImode
	logical operations on 32-bit systems.
	(vsx_ior<mode>3): Likewise.
	(vsx_xor<mode>3): Likewise.
	(vsx_one_cmpl<mode>2): Likewise.
	(vsx_nor<mode>3): Likewise.
	(vsx_andc<mode>3): Likewise.
	(vsx_concat_<mode>): Likewise.
	(vsx_xxpermdi_<mode>): Fix thinko for non V2DF/V2DI modes.

	* config/rs6000/rs6000.h (MASK_VSX_TIMODE): Map from
	OPTION_MASK_VSX_TIMODE.
	(enum rs6000_reg_class_enum): Add RS6000_CONSTRAINT_wt.
	(STACK_SAVEAREA_MODE): Use PTImode instead of TImode.

	* config/rs6000/rs6000.md (INT mode attribute): Add PTImode.
	(TI2 iterator): New iterator for TImode, PTImode.
	(wd mode attribute): Add values for vector types.
	(movti_string): Replace TI move operations with operations for
	TImode and PTImode.  Add support for TImode being allowed in VSX
	registers.
	(mov<mode>_string, TImode/PTImode): Likewise.
	(movti_ppc64): Likewise.
	(mov<mode>_ppc64, TImode/PTImode): Likewise.
	(TI mode splitters): Likewise.

	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document wt
	constraint.


[gcc/testsuite]

2014-04-04  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Backport from mainline
	2013-11-22  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/59054
	* gcc.target/powerpc/direct-move.h (VSX_REG_ATTR): Allow test to
	specify an appropriate register class for VSX operations.
	(load_vsx): Use it.
	(load_gpr_to_vsx): Likewise.
	(load_vsx_to_gpr): Likewise.
	* gcc.target/powerpc/direct-move-vint1.c: Use an appropriate
	register class for VSX registers that the type can handle.  Remove
	checks for explicit number of instructions generated, just check
	if the instruction is generated.
	* gcc.target/powerpc/direct-move-vint2.c: Likewise.
	* gcc.target/powerpc/direct-move-float1.c: Likewise.
	* gcc.target/powerpc/direct-move-float2.c: Likewise.
	* gcc.target/powerpc/direct-move-double1.c: Likewise.
	* gcc.target/powerpc/direct-move-double2.c: Likewise.
	* gcc.target/powerpc/direct-move-long1.c: Likewise.
	* gcc.target/powerpc/direct-move-long2.c: Likewise.

	* gcc.target/powerpc/bool3-av.c: Limit to 64-bit mode for now.
	* gcc.target/powerpc/bool3-p7.c: Likewise.
	* gcc.target/powerpc/bool3-p8.c: Likewise.

	* gcc.target/powerpc/p8vector-ldst.c: Just check that the
	appropriate instructions are generated, don't check the count.

	2013-11-12  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/59054
	* gcc.target/powerpc/pr59054.c: New test.

	2013-08-22  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/pr57744.c: Declare abort.

	2013-07-18  Pat Haugen  <pthaugen@us.ibm.com>

	* gcc.target/powerpc/pr57744.c: Fix typo.

	Back port from mainline
	2013-10-03  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/p8vector-fp.c: New test for floating point
	scalar operations when using -mupper-regs-sf and -mupper-regs-df.
	* gcc.target/powerpc/ppc-target-1.c: Update tests to allow either
	VSX scalar operations or the traditional floating point form of
	the instruction.
	* gcc.target/powerpc/ppc-target-2.c: Likewise.
	* gcc.target/powerpc/recip-3.c: Likewise.
	* gcc.target/powerpc/recip-5.c: Likewise.
	* gcc.target/powerpc/pr72747.c: Likewise.
	* gcc.target/powerpc/vsx-builtin-3.c: Likewise.

	Back port from mainline
	2013-09-27  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf
	and -mupper-regs-df.

	Back port from mainline
	2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* gcc.target/powerpc/pr58673-1.c: New file to test whether
	-mquad-word + -mno-vsx-timode causes errors.
	* gcc.target/powerpc/pr58673-2.c: Likewise.


	Backport from trunk.
	2013-07-23  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/bool2.h: New file, test the code generation
	of logical operations for power5, altivec, power7, and power8 systems.
	* gcc.target/powerpc/bool2-p5.c: Likewise.
	* gcc.target/powerpc/bool2-av.c: Likewise.
	* gcc.target/powerpc/bool2-p7.c: Likewise.
	* gcc.target/powerpc/bool2-p8.c: Likewise.
	* gcc.target/powerpc/bool3.h: Likewise.
	* gcc.target/powerpc/bool3-av.c: Likewise.
	* gcc.target/powerpc/bool2-p7.c: Likewise.
	* gcc.target/powerpc/bool2-p8.c: Likewise.

	Backport from trunk.
	2013-07-31  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/fusion.c: New file, test power8 fusion support.

	Back port from the trunk
	2013-06-28  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/57744
	* gcc.target/powerpc/pr57744.c: New test to make sure lqarx and
	stqcx. get even registers.

	Back port from the trunk

	2013-06-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic
	load/store instructions on power7, power8.
	* gcc.target/powerpc/atomic-p8.c: Likewise.

	Back port from the trunk

	2013-06-10  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* gcc.target/powerpc/direct-move-vint1.c: New tests for power8
	direct move instructions.
	* gcc.target/powerpc/direct-move-vint2.c: Likewise.
	* gcc.target/powerpc/direct-move.h: Likewise.
	* gcc.target/powerpc/direct-move-float1.c: Likewise.
	* gcc.target/powerpc/direct-move-float2.c: Likewise.
	* gcc.target/powerpc/direct-move-double1.c: Likewise.
	* gcc.target/powerpc/direct-move-double2.c: Likewise.
	* gcc.target/powerpc/direct-move-long1.c: Likewise.
	* gcc.target/powerpc/direct-move-long2.c: Likewise.

	Backport from the trunk

	2013-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* gcc.target/powerpc/p8vector-builtin-1.c: New test to test
	power8 builtin functions.
	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c: New
	tests to test power8 auto-vectorization.
	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c: Likewise.
	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c: Likewise.

	* gcc.target/powerpc/crypto-builtin-1.c: Use effective target
	powerpc_p8vector_ok instead of powerpc_vsx_ok.

	* gcc.target/powerpc/bool.c: New file, add eqv, nand, nor tests.

	* lib/target-supports.exp (check_p8vector_hw_available) Add power8
	support.
	(check_effective_target_powerpc_p8vector_ok): Likewise.
	(is-effective-target): Likewise.
	(check_vect_support_and_set_flags): Likewise.

	Backport from trunk

	2013-05-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
		    Pat Haugen <pthaugen@us.ibm.com>
		    Peter Bergner <bergner@vnet.ibm.com>

	* gcc.target/powerpc/crypto-builtin-1.c: New file, test for power8
	crypto builtins.

	Backport from mainline
	2013-03-20  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/mmfpgpr.c: New test.
	* gcc.target/powerpc/sd-vsx.c: Likewise.
	* gcc.target/powerpc/sd-pwr6.c: Likewise.
	* gcc.target/powerpc/vsx-float0.c: Likewise.

From-SVN: r209087
---
 gcc/ChangeLog                                 | 1438 ++++++++
 gcc/config/rs6000/altivec.h                   |   36 +
 gcc/config/rs6000/altivec.md                  |  554 ++--
 gcc/config/rs6000/constraints.md              |   58 +-
 gcc/config/rs6000/crypto.md                   |  101 +
 gcc/config/rs6000/dfp.md                      |  315 +-
 gcc/config/rs6000/driver-rs6000.c             |    2 +-
 gcc/config/rs6000/power8.md                   |  373 +++
 gcc/config/rs6000/predicates.md               |  253 ++
 gcc/config/rs6000/rs6000-builtin.def          |  245 +-
 gcc/config/rs6000/rs6000-c.c                  |  573 +++-
 gcc/config/rs6000/rs6000-cpus.def             |   25 +-
 gcc/config/rs6000/rs6000-modes.def            |    5 +
 gcc/config/rs6000/rs6000-opts.h               |    8 +-
 gcc/config/rs6000/rs6000-protos.h             |    7 +
 gcc/config/rs6000/rs6000.c                    | 2877 +++++++++++++++--
 gcc/config/rs6000/rs6000.h                    |  139 +-
 gcc/config/rs6000/rs6000.md                   | 2627 ++++++++++-----
 gcc/config/rs6000/rs6000.opt                  |   47 +-
 gcc/config/rs6000/sync.md                     |  166 +-
 gcc/config/rs6000/t-rs6000                    |    2 +
 gcc/config/rs6000/vector.md                   |   83 +-
 gcc/config/rs6000/vsx.md                      |  517 ++-
 gcc/doc/extend.texi                           |  335 ++
 gcc/doc/invoke.texi                           |   49 +-
 gcc/doc/md.texi                               |   52 +-
 gcc/testsuite/ChangeLog                       |  175 +
 gcc/testsuite/gcc.target/powerpc/atomic-p7.c  |  207 ++
 gcc/testsuite/gcc.target/powerpc/atomic-p8.c  |  237 ++
 gcc/testsuite/gcc.target/powerpc/bool.c       |   14 +
 gcc/testsuite/gcc.target/powerpc/bool2-av.c   |   32 +
 gcc/testsuite/gcc.target/powerpc/bool2-p5.c   |   32 +
 gcc/testsuite/gcc.target/powerpc/bool2-p7.c   |   31 +
 gcc/testsuite/gcc.target/powerpc/bool2-p8.c   |   32 +
 gcc/testsuite/gcc.target/powerpc/bool2.h      |   29 +
 gcc/testsuite/gcc.target/powerpc/bool3-av.c   |   37 +
 gcc/testsuite/gcc.target/powerpc/bool3-p7.c   |   37 +
 gcc/testsuite/gcc.target/powerpc/bool3-p8.c   |   36 +
 gcc/testsuite/gcc.target/powerpc/bool3.h      |  186 ++
 .../gcc.target/powerpc/crypto-builtin-1.c     |  130 +
 .../gcc.target/powerpc/direct-move-double1.c  |   16 +
 .../gcc.target/powerpc/direct-move-double2.c  |   15 +
 .../gcc.target/powerpc/direct-move-float1.c   |   18 +
 .../gcc.target/powerpc/direct-move-float2.c   |   15 +
 .../gcc.target/powerpc/direct-move-long1.c    |   16 +
 .../gcc.target/powerpc/direct-move-long2.c    |   15 +
 .../gcc.target/powerpc/direct-move-vint1.c    |   14 +
 .../gcc.target/powerpc/direct-move-vint2.c    |   13 +
 .../gcc.target/powerpc/direct-move.h          |  187 ++
 gcc/testsuite/gcc.target/powerpc/fusion.c     |   23 +
 gcc/testsuite/gcc.target/powerpc/mmfpgpr.c    |   22 +
 .../gcc.target/powerpc/p8vector-builtin-1.c   |   65 +
 .../gcc.target/powerpc/p8vector-builtin-2.c   |  204 ++
 .../gcc.target/powerpc/p8vector-builtin-3.c   |  104 +
 .../gcc.target/powerpc/p8vector-builtin-4.c   |  249 ++
 .../gcc.target/powerpc/p8vector-builtin-5.c   |  105 +
 .../gcc.target/powerpc/p8vector-builtin-6.c   |   10 +
 .../gcc.target/powerpc/p8vector-builtin-7.c   |   32 +
 .../gcc.target/powerpc/p8vector-fp.c          |  139 +
 .../gcc.target/powerpc/p8vector-ldst.c        |   42 +
 .../gcc.target/powerpc/p8vector-vectorize-1.c |  200 ++
 .../gcc.target/powerpc/p8vector-vectorize-2.c |   30 +
 .../gcc.target/powerpc/p8vector-vectorize-3.c |   29 +
 .../gcc.target/powerpc/p8vector-vectorize-4.c |   69 +
 .../gcc.target/powerpc/p8vector-vectorize-5.c |   87 +
 .../gcc.target/powerpc/ppc-target-1.c         |    3 +-
 .../gcc.target/powerpc/ppc-target-2.c         |    3 +-
 gcc/testsuite/gcc.target/powerpc/pr42747.c    |    2 +-
 gcc/testsuite/gcc.target/powerpc/pr57744.c    |   39 +
 gcc/testsuite/gcc.target/powerpc/pr58673-1.c  |   78 +
 gcc/testsuite/gcc.target/powerpc/pr58673-2.c  |  217 ++
 gcc/testsuite/gcc.target/powerpc/pr59054.c    |    9 +
 gcc/testsuite/gcc.target/powerpc/recip-3.c    |   12 +-
 gcc/testsuite/gcc.target/powerpc/recip-5.c    |    4 +-
 gcc/testsuite/gcc.target/powerpc/sd-pwr6.c    |   19 +
 gcc/testsuite/gcc.target/powerpc/sd-vsx.c     |   20 +
 .../gcc.target/powerpc/vsx-builtin-3.c        |    6 +-
 gcc/testsuite/gcc.target/powerpc/vsx-float0.c |   16 +
 gcc/testsuite/lib/target-supports.exp         |   59 +-
 79 files changed, 12101 insertions(+), 2207 deletions(-)
 create mode 100644 gcc/config/rs6000/crypto.md
 create mode 100644 gcc/config/rs6000/power8.md
 create mode 100644 gcc/testsuite/gcc.target/powerpc/atomic-p7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/atomic-p8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-av.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-p5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-p7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2-p8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool2.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3-av.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3-p7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3-p8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bool3.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/fusion.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr57744.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr58673-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr58673-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr59054.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sd-vsx.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-float0.c
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 700a17972837..d9b041af5719 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,1441 @@
+2014-04-04  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	Power8 Base Support
+	Apply mainline
+	2013-11-23  Alan Modra  <amodra@gmail.com>
+	* config/rs6000/vsx.md (fusion peepholes): Disable when !TARGET_VSX.
+
+	Backport from mainline
+	2013-11-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/59054
+	* config/rs6000/rs6000.md (movdi_internal32): Eliminate
+	constraints that would allow DImode into the traditional Altivec
+	registers, but cause undesirable code generation when loading 0 as
+	a constant.
+	(movdi_internal64): Likewise.
+	(cmp<mode>_fpr): Do not use %x for CR register output.
+	(extendsfdf2_fpr): Fix constraints when -mallow-upper-df and
+	-mallow-upper-sf debug switches are used.
+
+	Backport from mainline
+	2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (enum rs6000_reload_reg_type): Add new
+	fields to the reg_addr array that describes the valid addressing
+	mode for any register, general purpose registers, floating point
+	registers, and Altivec registers.
+	(FIRST_RELOAD_REG_CLASS): Likewise.
+	(LAST_RELOAD_REG_CLASS): Likewise.
+	(struct reload_reg_map_type): Likewise.
+	(reload_reg_map_type): Likewise.
+	(RELOAD_REG_VALID): Likewise.
+	(RELOAD_REG_MULTIPLE): Likewise.
+	(RELOAD_REG_INDEXED): Likewise.
+	(RELOAD_REG_OFFSET): Likewise.
+	(RELOAD_REG_PRE_INCDEC): Likewise.
+	(RELOAD_REG_PRE_MODIFY): Likewise.
+	(reg_addr): Likewise.
+	(mode_supports_pre_incdec_p): New helper functions to say whether
+	a given mode supports PRE_INC, PRE_DEC, and PRE_MODIFY.
+	(mode_supports_pre_modify_p): Likewise.
+	(rs6000_debug_vector_unit): Rearrange the -mdebug=reg output to
+	print the valid address mode bits for each mode.
+	(rs6000_debug_print_mode): Likewise.
+	(rs6000_debug_reg_global): Likewise.
+	(rs6000_setup_reg_addr_masks): New function to set up the address
+	mask bits for each type.
+	(rs6000_init_hard_regno_mode_ok): Use memset to clear arrays.
+	Call rs6000_setup_reg_addr_masks to set up the address mask bits.
+	(rs6000_legitimate_address_p): Use mode_supports_pre_incdec_p and
+	mode_supports_pre_modify_p to determine if PRE_INC, PRE_DEC, and
+	PRE_MODIFY are supported.
+	(rs6000_output_move_128bit): Change to use {src,dest}_vmx_p for altivec
+	registers, instead of {src,dest}_av_p.
+	(rs6000_print_options_internal): Tweak the debug output slightly.
+
+	Backport from mainline
+	2013-10-03  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000-builtin.def (XSRDPIM): Use floatdf2,
+	ceildf2, btruncdf2, instead of vsx_* name.
+
+	* config/rs6000/vsx.md (vsx_add<mode>3): Change arithmetic
+	iterators to only do V2DF and V4SF here.  Move the DF code to
+	rs6000.md where it is combined with SF mode.  Replace <VSv> with
+	just 'v' since only vector operations are handled with these insns
+	after moving the DF support to rs6000.md.
+	(vsx_sub<mode>3): Likewise.
+	(vsx_mul<mode>3): Likewise.
+	(vsx_div<mode>3): Likewise.
+	(vsx_fre<mode>2): Likewise.
+	(vsx_neg<mode>2): Likewise.
+	(vsx_abs<mode>2): Likewise.
+	(vsx_nabs<mode>2): Likewise.
+	(vsx_smax<mode>3): Likewise.
+	(vsx_smin<mode>3): Likewise.
+	(vsx_sqrt<mode>2): Likewise.
+	(vsx_rsqrte<mode>2): Likewise.
+	(vsx_fms<mode>4): Likewise.
+	(vsx_nfma<mode>4): Likewise.
+	(vsx_copysign<mode>3): Likewise.
+	(vsx_btrunc<mode>2): Likewise.
+	(vsx_floor<mode>2): Likewise.
+	(vsx_ceil<mode>2): Likewise.
+	(vsx_smaxsf3): Delete scalar ops that were moved to rs6000.md.
+	(vsx_sminsf3): Likewise.
+	(vsx_fmadf4): Likewise.
+	(vsx_fmsdf4): Likewise.
+	(vsx_nfmadf4): Likewise.
+	(vsx_nfmsdf4): Likewise.
+	(vsx_cmpdf_internal1): Likewise.
+
+	* config/rs6000/rs6000.h (TARGET_SF_SPE): Define macros to make it
+	simpler to select whether a target has SPE or traditional floating
+	point support in iterators.
+	(TARGET_DF_SPE): Likewise.
+	(TARGET_SF_FPR): Likewise.
+	(TARGET_DF_FPR): Likewise.
+	(TARGET_SF_INSN): Macros to say whether floating point support
+	exists for a given operation for expanders.
+	(TARGET_DF_INSN): Likewise.
+
+	* config/rs6000/rs6000.c (Ftrad): New mode attributes to allow
+	combining of SF/DF mode operations, using both traditional and VSX
+	registers.
+	(Fvsx): Likewise.
+	(Ff): Likewise.
+	(Fv): Likewise.
+	(Fs): Likewise.
+	(Ffre): Likewise.
+	(FFRE): Likewise.
+	(abs<mode>2): Combine SF/DF modes using traditional floating point
+	instructions.  Add support for using the upper DF registers with
+	VSX support, and SF registers with power8-vector support.  Update
+	expanders for operations supported by both the SPE and traditional
+	floating point units.
+	(abs<mode>2_fpr): Likewise.
+	(nabs<mode>2): Likewise.
+	(nabs<mode>2_fpr): Likewise.
+	(neg<mode>2): Likewise.
+	(neg<mode>2_fpr): Likewise.
+	(add<mode>3): Likewise.
+	(add<mode>3_fpr): Likewise.
+	(sub<mode>3): Likewise.
+	(sub<mode>3_fpr): Likewise.
+	(mul<mode>3): Likewise.
+	(mul<mode>3_fpr): Likewise.
+	(div<mode>3): Likewise.
+	(div<mode>3_fpr): Likewise.
+	(sqrt<mode>3): Likewise.
+	(sqrt<mode>3_fpr): Likewise.
+	(fre<Fs>): Likewise.
+	(rsqrt<mode>2): Likewise.
+	(cmp<mode>_fpr): Likewise.
+	(smax<mode>3): Likewise.
+	(smin<mode>3): Likewise.
+	(smax<mode>3_vsx): Likewise.
+	(smin<mode>3_vsx): Likewise.
+	(negsf2): Delete SF operations that are merged with DF.
+	(abssf2): Likewise.
+	(addsf3): Likewise.
+	(subsf3): Likewise.
+	(mulsf3): Likewise.
+	(divsf3): Likewise.
+	(fres): Likewise.
+	(fmasf4_fpr): Likewise.
+	(fmssf4_fpr): Likewise.
+	(nfmasf4_fpr): Likewise.
+	(nfmssf4_fpr): Likewise.
+	(sqrtsf2): Likewise.
+	(rsqrtsf_internal1): Likewise.
+	(smaxsf3): Likewise.
+	(sminsf3): Likewise.
+	(cmpsf_internal1): Likewise.
+	(copysign<mode>3_fcpsgn): Add VSX/power8-vector support.
+	(negdf2): Delete DF operations that are merged with SF.
+	(absdf2): Likewise.
+	(nabsdf2): Likewise.
+	(adddf3): Likewise.
+	(subdf3): Likewise.
+	(muldf3): Likewise.
+	(divdf3): Likewise.
+	(fred): Likewise.
+	(rsqrtdf_internal1): Likewise.
+	(fmadf4_fpr): Likewise.
+	(fmsdf4_fpr): Likewise.
+	(nfmadf4_fpr): Likewise.
+	(nfmsdf4_fpr): Likewise.
+	(sqrtdf2): Likewise.
+	(smaxdf3): Likewise.
+	(smindf3): Likewise.
+	(cmpdf_internal1): Likewise.
+	(lrint<mode>di2): Use TARGET_<MODE>_FPR macro.
+	(btrunc<mode>2): Delete separate expander, and combine with the
+	insn and add VSX instruction support.  Use TARGET_<MODE>_FPR.
+	(btrunc<mode>2_fpr): Likewise.
+	(ceil<mode>2): Likewise.
+	(ceil<mode>2_fpr): Likewise.
+	(floor<mode>2): Likewise.
+	(floor<mode>2_fpr): Likewise.
+	(fma<mode>4_fpr): Combine SF and DF fused multiply/add support.
+	Add support for using the upper registers with VSX and
+	power8-vector.  Move insns to be closer to the define_expands. On
+	VSX systems, prefer the traditional form of FMA over the VSX
+	version, since the traditional form allows the target not to
+	overlap with the inputs.
+	(fms<mode>4_fpr): Likewise.
+	(nfma<mode>4_fpr): Likewise.
+	(nfms<mode>4_fpr): Likewise.
+
+	Backport from mainline
+	2013-09-27  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow
+	DFmode, DImode, and SFmode in the upper VSX registers based on the
+	-mupper-regs-{df,sf} flags.  Fix wu constraint to be ALTIVEC_REGS
+	if -mpower8-vector.  Combine -mvsx-timode handling with the rest
+	of the VSX register handling.
+
+	* config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters.
+	(f32_sv): Likewise.
+	(zero_extendsidi2_lfiwzx): Add support for loading into the
+	Altivec registers with -mpower8-vector.  Use wu/wv constraints to
+	only do VSX memory options on Altivec registers.
+	(extendsidi2_lfiwax): Likewise.
+	(extendsfdf2_fpr): Likewise.
+	(mov<mode>_hardfloat, SF/SD modes): Likewise.
+	(mov<mode>_hardfloat32, DF/DD modes): Likewise.
+	(mov<mode>_hardfloat64, DF/DD modes): Likewise.
+	(movdi_internal64): Likewise.
+
+	Backport from mainline
+	2013-09-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_vector_reload): Delete, combine
+	reload helper function arrays into a single array reg_addr.
+	(reload_fpr_gpr): Likewise.
+	(reload_gpr_vsx): Likewise.
+	(reload_vsx_gpr): Likewise.
+	(struct rs6000_reg_addr): Likewise.
+	(reg_addr): Likewise.
+	(rs6000_debug_reg_global): Change rs6000_vector_reload,
+	reload_fpr_gpr, reload_gpr_vsx, reload_vsx_gpr uses to reg_addr.
+	(rs6000_init_hard_regno_mode_ok): Likewise.
+	(rs6000_secondary_reload_direct_move): Likewise.
+	(rs6000_secondary_reload): Likewise.
+
+	* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add new
+	constraints: wu, ww, and wy.  Repurpose wv constraint added during
+	power8 changes.  Put wg constraint in alphabetical order.
+
+	* config/rs6000/rs6000.opt (-mvsx-scalar-float): New debug switch
+	for future work to add ISA 2.07 VSX single precision support.
+	(-mvsx-scalar-double): Change default from -1 to 1, update
+	documentation comment.
+	(-mvsx-scalar-memory): Rename debug switch to -mupper-regs-df.
+	(-mupper-regs-df): New debug switch to control whether DF values
+	can go in the traditional Altivec registers.
+	(-mupper-regs-sf): New debug switch to control whether SF values
+	can go in the traditional Altivec registers.
+
+	* config/rs6000/rs6000.c (rs6000_debug_reg_global): Print wu, ww,
+	and wy constraints.
+	(rs6000_init_hard_regno_mode_ok): Use ssize_t instead of int for
+	loop variables.  Rename -mvsx-scalar-memory to -mupper-regs-df.
+	Add new constraints, wu/ww/wy.  Repurpose wv constraint.
+	(rs6000_debug_legitimate_address_p): Print if we are running
+	before, during, or after reload.
+	(rs6000_secondary_reload): Add a comment.
+	(rs6000_opt_masks): Add -mupper-regs-df, -mupper-regs-sf.
+
+	* config/rs6000/constraints.md (wa constraint): Sort w<x>
+	constraints.  Update documentation string.
+	(wd constraint): Likewise.
+	(wf constraint): Likewise.
+	(wg constraint): Likewise.
+	(wn constraint): Likewise.
+	(ws constraint): Likewise.
+	(wt constraint): Likewise.
+	(wx constraint): Likewise.
+	(wz constraint): Likewise.
+	(wu constraint): New constraint for ISA 2.07 SFmode scalar
+	instructions.
+	(ww constraint): Likewise.
+	(wy constraint): Likewise.
+	(wv constraint): Repurpose ISA 2.07 constraint that did not use in
+	the previous submissions.
+	* doc/md.texi (PowerPC and IBM RS6000): Likewise.
+
+	Backport from mainline
+	2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/58673
+	* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
+	restrict TImode addresses to single indirect registers if both
+	-mquad-memory and -mvsx-timode are used.
+	(rs6000_output_move_128bit): Use quad_load_store_p to determine if
+	we should emit load/store quad.  Remove using %y for quad memory
+	addresses.
+
+	* config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
+	constraints to allow load/store quad on machines where TImode is
+	not allowed in VSX registers.  Use 'n' instead of 'F' constraint
+	for TImode to load integer constants.
+
+	Backport from mainline
+	2013-10-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/58587
+	* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Turn off
+	setting -mvsx-timode by default until the underlying problem is
+	fixed.
+	(RS6000_CPU, power7 defaults): Likewise.
+
+	Backport from trunk
+	2013-08-16  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/58160
+	* config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the
+	memory rtx to contain ZERO_EXTEND and SIGN_EXTEND.
+
+	* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands
+	array instead of each individual operand as a separate argument.
+	(emit_fusion_gpr_load): Likewise.
+	(expand_fusion_gpr_load): Add new function declaration.
+
+	* config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling
+	signature to have the operands passed as an array, instead of as
+	separate arguments.  Allow ZERO_EXTEND to be in the memory
+	address, and also SIGN_EXTEND if -mpower8-fusion-sign.  Do not
+	depend on the register live/dead flags when peepholes are run.
+	(expand_fusion_gpr_load): New function to be called from the
+	peephole2 pass, to change the register that addis sets to be the
+	target register.
+	(emit_fusion_gpr_load): Change the calling signature to have the
+	operands passed as an array, instead of as separate arguments.
+	Allow ZERO_EXTEND to be in the memory address, and also
+	SIGN_EXTEND if -mpower8-fusion-sign.
+
+	* config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused
+	unspec enumeration.
+	(power8 fusion peephole/peephole2): Rework the fusion peepholes to
+	adjust the register addis loads up in the peephole2 pass.  Do not
+	depend on the register live/dead state when the peephole pass is
+	done.
+
+	Backport from trunk
+	2013-07-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/vector.md (xor<mode>3): Move 128-bit boolean
+	expanders to rs6000.md.
+	(ior<mode>3): Likewise.
+	(and<mode>3): Likewise.
+	(one_cmpl<mode>2): Likewise.
+	(nor<mode>3): Likewise.
+	(andc<mode>3): Likewise.
+	(eqv<mode>3): Likewise.
+	(nand<mode>3): Likewise.
+	(orc<mode>3): Likewise.
+
+	* config/rs6000/rs6000-protos.h (rs6000_split_logical): New
+	declaration.
+
+	* config/rs6000/rs6000.c (rs6000_split_logical_inner): Add support
+	to split multi-word logical operations.
+	(rs6000_split_logical_di): Likewise.
+	(rs6000_split_logical): Likewise.
+
+	* config/rs6000/vsx.md (VSX_L2): Delete, no longer used.
+	(vsx_and<mode>3_32bit): Move 128-bit logical insns to rs6000.md,
+	and allow TImode operations in 32-bit.
+	(vsx_and<mode>3_64bit): Likewise.
+	(vsx_ior<mode>3_32bit): Likewise.
+	(vsx_ior<mode>3_64bit): Likewise.
+	(vsx_xor<mode>3_32bit): Likewise.
+	(vsx_xor<mode>3_64bit): Likewise.
+	(vsx_one_cmpl<mode>2_32bit): Likewise.
+	(vsx_one_cmpl<mode>2_64bit): Likewise.
+	(vsx_nor<mode>3_32bit): Likewise.
+	(vsx_nor<mode>3_64bit): Likewise.
+	(vsx_andc<mode>3_32bit): Likewise.
+	(vsx_andc<mode>3_64bit): Likewise.
+	(vsx_eqv<mode>3_32bit): Likewise.
+	(vsx_eqv<mode>3_64bit): Likewise.
+	(vsx_nand<mode>3_32bit): Likewise.
+	(vsx_nand<mode>3_64bit): Likewise.
+	(vsx_orc<mode>3_32bit): Likewise.
+	(vsx_orc<mode>3_64bit): Likewise.
+
+	* config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Always allow vector
+	logical types in GPRs.
+
+	* config/rs6000/altivec.md (altivec_and<mode>3): Move 128-bit
+	logical insns to rs6000.md, and allow TImode operations in
+	32-bit.
+	(altivec_ior<mode>3): Likewise.
+	(altivec_xor<mode>3): Likewise.
+	(altivec_one_cmpl<mode>2): Likewise.
+	(altivec_nor<mode>3): Likewise.
+	(altivec_andc<mode>3): Likewise.
+
+	* config/rs6000/rs6000.md (BOOL_128): New mode iterators and mode
+	attributes for moving the 128-bit logical operations into
+	rs6000.md.
+	(BOOL_REGS_OUTPUT): Likewise.
+	(BOOL_REGS_OP1): Likewise.
+	(BOOL_REGS_OP2): Likewise.
+	(BOOL_REGS_UNARY): Likewise.
+	(BOOL_REGS_AND_CR0): Likewise.
+	(one_cmpl<mode>2): Add support for DI logical operations on
+	32-bit, splitting the operations to 32-bit.
+	(anddi3): Likewise.
+	(iordi3): Likewise.
+	(xordi3): Likewise.
+	(and<mode>3, 128-bit types): Rewrite 2013-06-06 logical operator
+	changes to combine the 32/64-bit code, allow logical operations on
+	TI mode in 32-bit, and to use similar match_operator patterns like
+	scalar mode uses.  Combine the Altivec and VSX code for logical
+	operations, and move it here.
+	(ior<mode>3, 128-bit types): Likewise.
+	(xor<mode>3, 128-bit types): Likewise.
+	(one_cmpl<mode>3, 128-bit types): Likewise.
+	(nor<mode>3, 128-bit types): Likewise.
+	(andc<mode>3, 128-bit types): Likewise.
+	(eqv<mode>3, 128-bit types): Likewise.
+	(nand<mode>3, 128-bit types): Likewise.
+	(orc<mode>3, 128-bit types): Likewise.
+	(and<mode>3_internal): Likewise.
+	(bool<mode>3_internal): Likewise.
+	(boolc<mode>3_internal1): Likewise.
+	(boolc<mode>3_internal2): Likewise.
+	(boolcc<mode>3_internal1): Likewise.
+	(boolcc<mode>3_internal2): Likewise.
+	(eqv<mode>3_internal1): Likewise.
+	(eqv<mode>3_internal2): Likewise.
+	(one_cmpl1<mode>3_internal): Likewise.
+
+	Back port from mainline:
+	2013-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* lib/target-supports.exp (check_p8vector_hw_available) Add power8
+	support.
+	(check_effective_target_powerpc_p8vector_ok): Likewise.
+	(is-effective-target): Likewise.
+	(check_vect_support_and_set_flags): Likewise.
+
+	Backport from mainline
+	2013-07-31  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/predicates.md (fusion_gpr_addis): New predicates
+	to support power8 load fusion.
+	(fusion_gpr_mem_load): Likewise.
+
+	* config/rs6000/rs6000-modes.def (PTImode): Update a comment.
+
+	* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
+	declarations for power8 load fusion.
+	(emit_fusion_gpr_load): Likewise.
+
+	* config/rs6000/rs6000.c (rs6000_option_override_internal): If
+	tuning for power8, turn on fusion mode by default.  Turn on sign
+	extending fusion mode if normal fusion mode is on, and we are at
+	-O2 or -O3.
+	(fusion_gpr_load_p): New function, return true if we can fuse an
+	addis instruction with a dependent load to a GPR.
+	(emit_fusion_gpr_load): Emit the instructions for power8 load
+	fusion to GPRs.
+
+	* config/rs6000/vsx.md (VSX_M2): New iterator for fusion
+	peepholes.
+	(VSX load fusion peepholes): New peepholes to fuse together an
+	addi instruction with a VSX load instruction.
+
+	* config/rs6000/rs6000.md (GPR load fusion peepholes): New
+	peepholes to fuse an addis instruction with a load to a GPR base
+	register.  If we are supporting sign extending fusions, convert
+	sign extending loads to zero extending loads and add an explicit
+	sign extension.
+
+	Backport from mainline
+	2013-07-18  Pat Haugen <pthaugen@us.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_option_override_internal): Adjust flag
+	interaction for new Power8 flags and VSX.
+
+	Back port from the trunk
+	2013-06-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/57744
+	* config/rs6000/rs6000.h (MODES_TIEABLE_P): Do not allow PTImode
+	to tie with any other modes.  Eliminate Altivec vector mode tests,
+	since these are a subset of ALTIVEC or VSX vector modes.  Simplify
+	code, to return 0 if testing MODE2 for a condition, if we've
+	already tested MODE1 for the same condition.
+
+	Backport from mainline
+	2013-06-28  Pat Haugen <pthaugen@us.ibm.com>
+
+	* config/rs6000/rs6000.md (define_insn ""): Fix insn type.
+
+	Back port from the trunk
+	2013-06-26  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* config/rs6000/power8.md: New.
+	* config/rs6000/rs6000-cpus.def (RS6000_CPU table): Adjust processor
+	setting for power8 entry.
+	* config/rs6000/t-rs6000 (MD_INCLUDES): Add power8.md.
+	* config/rs6000/rs6000.c (is_microcoded_insn, is_cracked_insn): Adjust
+	test for Power4/Power5 only.
+	(insn_must_be_first_in_group, insn_must_be_last_in_group): Add Power8
+	support.
+	(force_new_group): Adjust comment.
+	* config/rs6000/rs6000.md: Include power8.md.
+
+	Back port from the trunk
+	2013-06-14  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/57615
+	* config/rs6000/rs6000.md (mov<mode>_ppc64): Call
+	rs6000_output_move_128bit to handle emitting quad memory
+	operations.  Set attribute length to 8 bytes.
+
+	Back port from the trunk
+	2013-06-13  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_option_override_internal): Move
+	test for clearing quad memory on 32-bit later.
+
+	Back port from the trunk
+
+	2013-06-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (emit_load_locked): Add support for
+	power8 byte, half-word, and quad-word atomic instructions.
+	(emit_store_conditional): Likewise.
+	(rs6000_expand_atomic_compare_and_swap): Likewise.
+	(rs6000_expand_atomic_op): Likewise.
+
+	* config/rs6000/sync.md (larx): Add new modes for power8.
+	(stcx): Likewise.
+	(AINT): New mode iterator to include TImode as well as normal
+	integer modes on power8.
+	(fetchop_pred): Use int_reg_operand instead of gpc_reg_operand so
+	that VSX registers are not considered.  Use AINT mode iterator
+	instead of INT1 to allow inclusion of quad word atomic operations
+	on power8.
+	(load_locked<mode>): Likewise.
+	(store_conditional<mode>): Likewise.
+	(atomic_compare_and_swap<mode>): Likewise.
+	(atomic_exchange<mode>): Likewise.
+	(atomic_nand<mode>): Likewise.
+	(atomic_fetch_<fetchop_name><mode>): Likewise.
+	(atomic_nand_fetch<mode>): Likewise.
+	(mem_thread_fence): Use gen_loadsync_<mode> instead of enumerating
+	each type.
+	(ATOMIC): On power8, add QImode, HImode modes.
+	(load_locked<QHI:mode>_si): Varients of load_locked for QI/HI
+	modes that promote to SImode.
+	(load_lockedti): Convert TImode arguments to PTImode, so that we
+	get a guaranteed even/odd register pair.
+	(load_lockedpti): Likewise.
+	(store_conditionalti): Likewise.
+	(store_conditionalpti): Likewise.
+
+	* config/rs6000/rs6000.md (QHI): New mode iterator for power8
+	atomic load/store instructions.
+	(HSI): Likewise.
+
+	Back port from the trunk
+
+	2013-06-10  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* config/rs6000/vector.md (GPR move splitter): Do not split moves
+	of vectors in GPRS if they are direct moves or quad word load or
+	store moves.
+
+	* config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add
+	declaration.
+	(direct_move_p): Likewise.
+	(quad_load_store_p): Likewise.
+
+	* config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register
+	classes into bins based on the physical register type.
+	(reg_class_to_reg_type): Likewise.
+	(IS_STD_REG_TYPE): Likewise.
+	(IS_FP_VECT_REG_TYPE): Likewise.
+	(reload_fpr_gpr): Arrays to determine what insn to use if we can
+	use direct move instructions.
+	(reload_gpr_vsx): Likewise.
+	(reload_vsx_gpr): Likewise.
+	(rs6000_init_hard_regno_mode_ok): Precalculate the register type
+	information that is a simplification of register classes.  Also
+	precalculate direct move reload helpers.
+	(direct_move_p): New function to return true if the operation can
+	be done as a direct move instruciton.
+	(quad_load_store_p): New function to return true if the operation
+	is a quad memory operation.
+	(rs6000_legitimize_address): If quad memory, only allow register
+	indirect for TImode addresses.
+	(rs6000_legitimate_address_p): Likewise.
+	(enum reload_reg_type): Delete, replace with rs6000_reg_type.
+	(rs6000_reload_register_type): Likewise.
+	(register_to_reg_type): Return register type.
+	(rs6000_secondary_reload_simple_move): New helper function for
+	secondary reload and secondary memory needed to identify anything
+	that is a simple move, and does not need reloading.
+	(rs6000_secondary_reload_direct_move): New helper function for
+	secondary reload to identify cases that can be done with several
+	instructions via the direct move instructions.
+	(rs6000_secondary_reload_move): New helper function for secondary
+	reload to identify moves between register types that can be done.
+	(rs6000_secondary_reload): Add support for quad memory operations
+	and for direct move.
+	(rs6000_secondary_memory_needed): Likewise.
+	(rs6000_debug_secondary_memory_needed): Change argument names.
+	(rs6000_output_move_128bit): New function to return the move to
+	use for 128-bit moves, including knowing about the various
+	limitations of quad memory operations.
+
+	* config/rs6000/vsx.md (vsx_mov<mode>): Add support for quad
+	memory operations.  call rs6000_output_move_128bit for the actual
+	instruciton(s) to generate.
+	(vsx_movti_64bit): Likewise.
+
+	* config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values.
+	(UNSPEC_P8V_MTVSRWZ): Likewise.
+	(UNSPEC_P8V_RELOAD_FROM_GPR): Likewise.
+	(UNSPEC_P8V_MTVSRD): Likewise.
+	(UNSPEC_P8V_XXPERMDI): Likewise.
+	(UNSPEC_P8V_RELOAD_FROM_VSX): Likewise.
+	(UNSPEC_FUSION_GPR): Likewise.
+	(FMOVE128_GPR): New iterator for direct move.
+	(f32_lv): New mode attribute for load/store of SFmode/SDmode
+	values.
+	(f32_sv): Likewise.
+	(f32_dm): Likewise.
+	(zero_extend<mode>di2_internal1): Add support for power8 32-bit
+	loads and direct move instructions.
+	(zero_extendsidi2_lfiwzx): Likewise.
+	(extendsidi2_lfiwax): Likewise.
+	(extendsidi2_nocell): Likewise.
+	(floatsi<mode>2_lfiwax): Likewise.
+	(lfiwax): Likewise.
+	(floatunssi<mode>2_lfiwzx): Likewise.
+	(lfiwzx): Likewise.
+	(fix_trunc<mode>_stfiwx): Likewise.
+	(fixuns_trunc<mode>_stfiwx): Likewise.
+	(mov<mode>_hardfloat, 32-bit floating point): Likewise.
+	(mov<move>_hardfloat64, 64-bit floating point): Likewise.
+	(parity<mode>2_cmpb): Set length/type attr.
+	(unnamed shift right patterns, mov<mode>_internal2): Change type attr
+	for 'mr.' to fast_compare.
+	(bpermd_<mode>): Change type attr to popcnt.
+	(p8_fmrgow_<mode>): New insns for power8 direct move support.
+	(p8_mtvsrwz_1): Likewise.
+	(p8_mtvsrwz_2): Likewise.
+	(reload_fpr_from_gpr<mode>): Likewise.
+	(p8_mtvsrd_1): Likewise.
+	(p8_mtvsrd_2): Likewise.
+	(p8_xxpermdi_<mode>): Likewise.
+	(reload_vsx_from_gpr<mode>): Likewise.
+	(reload_vsx_from_gprsf): Likewise.
+	(p8_mfvsrd_3_<mode>): LIkewise.
+	(reload_gpr_from_vsx<mode>): Likewise.
+	(reload_gpr_from_vsxsf): Likewise.
+	(p8_mfvsrd_4_disf): Likewise.
+	(multi-word GPR splits): Do not split direct moves or quad memory
+	operations.
+
+	Backport from the trunk
+
+	2013-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions):
+	Document new power8 builtins.
+
+	* config/rs6000/vector.md (and<mode>3): Add a clobber/scratch of a
+	condition code register, to allow 128-bit logical operations to be
+	done in the VSX or GPR registers.
+	(nor<mode>3): Use the canonical form for nor.
+	(eqv<mode>3): Add expanders for power8 xxleqv, xxlnand, xxlorc,
+	vclz*, and vpopcnt* vector instructions.
+	(nand<mode>3): Likewise.
+	(orc<mode>3): Likewise.
+	(clz<mode>2): LIkewise.
+	(popcount<mode>2): Likewise.
+
+	* config/rs6000/predicates.md (int_reg_operand): Rework tests so
+	that only the GPRs are recognized.
+
+	* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
+	support for new power8 builtins.
+
+	* config/rs6000/rs6000-builtin.def (xscvspdpn): Add new power8
+	builtin functions.
+	(xscvdpspn): Likewise.
+	(vclz): Likewise.
+	(vclzb): Likewise.
+	(vclzh): Likewise.
+	(vclzw): Likewise.
+	(vclzd): Likewise.
+	(vpopcnt): Likewise.
+	(vpopcntb): Likewise.
+	(vpopcnth): Likewise.
+	(vpopcntw): Likewise.
+	(vpopcntd): Likewise.
+	(vgbbd): Likewise.
+	(vmrgew): Likewise.
+	(vmrgow): Likewise.
+	(eqv): Likewise.
+	(eqv_v16qi3): Likewise.
+	(eqv_v8hi3): Likewise.
+	(eqv_v4si3): Likewise.
+	(eqv_v2di3): Likewise.
+	(eqv_v4sf3): Likewise.
+	(eqv_v2df3): Likewise.
+	(nand): Likewise.
+	(nand_v16qi3): Likewise.
+	(nand_v8hi3): Likewise.
+	(nand_v4si3): Likewise.
+	(nand_v2di3): Likewise.
+	(nand_v4sf3): Likewise.
+	(nand_v2df3): Likewise.
+	(orc): Likewise.
+	(orc_v16qi3): Likewise.
+	(orc_v8hi3): Likewise.
+	(orc_v4si3): Likewise.
+	(orc_v2di3): Likewise.
+	(orc_v4sf3): Likewise.
+	(orc_v2df3): Likewise.
+
+	* config/rs6000/rs6000.c (rs6000_option_override_internal): Only
+	allow power8 quad mode in 64-bit.
+	(rs6000_builtin_vectorized_function): Add support to vectorize
+	ISA 2.07 count leading zeros, population count builtins.
+	(rs6000_expand_vector_init): On ISA 2.07 use xscvdpspn to form
+	V4SF vectors instead of xscvdpsp to avoid IEEE related traps.
+	(builtin_function_type): Add vgbbd builtin function which takes an
+	unsigned argument.
+	(altivec_expand_vec_perm_const): Add support for new power8 merge
+	instructions.
+
+	* config/rs6000/vsx.md (VSX_L2): New iterator for 128-bit types,
+	that does not include TImdoe for use with 32-bit.
+	(UNSPEC_VSX_CVSPDPN): Support for power8 xscvdpspn and xscvspdpn
+	instructions.
+	(UNSPEC_VSX_CVDPSPN): Likewise.
+	(vsx_xscvdpspn): Likewise.
+	(vsx_xscvspdpn): Likewise.
+	(vsx_xscvdpspn_scalar): Likewise.
+	(vsx_xscvspdpn_directmove): Likewise.
+	(vsx_and<mode>3): Split logical operations into 32-bit and
+	64-bit. Add support to do logical operations on TImode as well as
+	VSX vector types.  Allow logical operations to be done in either
+	VSX registers or in general purpose registers in 64-bit mode.  Add
+	splitters if GPRs were used. For AND, add clobber of CCmode to
+	allow use of ANDI on GPRs.  Rewrite nor to use the canonical RTL
+	encoding.
+	(vsx_and<mode>3_32bit): Likewise.
+	(vsx_and<mode>3_64bit): Likewise.
+	(vsx_ior<mode>3): Likewise.
+	(vsx_ior<mode>3_32bit): Likewise.
+	(vsx_ior<mode>3_64bit): Likewise.
+	(vsx_xor<mode>3): Likewise.
+	(vsx_xor<mode>3_32bit): Likewise.
+	(vsx_xor<mode>3_64bit): Likewise.
+	(vsx_one_cmpl<mode>2): Likewise.
+	(vsx_one_cmpl<mode>2_32bit): Likewise.
+	(vsx_one_cmpl<mode>2_64bit): Likewise.
+	(vsx_nor<mode>3): Likewise.
+	(vsx_nor<mode>3_32bit): Likewise.
+	(vsx_nor<mode>3_64bit): Likewise.
+	(vsx_andc<mode>3): Likewise.
+	(vsx_andc<mode>3_32bit): Likewise.
+	(vsx_andc<mode>3_64bit): Likewise.
+	(vsx_eqv<mode>3_32bit): Add support for power8 xxleqv, xxlnand,
+	and xxlorc instructions.
+	(vsx_eqv<mode>3_64bit): Likewise.
+	(vsx_nand<mode>3_32bit): Likewise.
+	(vsx_nand<mode>3_64bit): Likewise.
+	(vsx_orc<mode>3_32bit): Likewise.
+	(vsx_orc<mode>3_64bit): Likewise.
+
+	* config/rs6000/rs6000.h (VLOGICAL_REGNO_P): Update comment.
+
+	* config/rs6000/altivec.md (UNSPEC_VGBBD): Add power8 vgbbd
+	instruction.
+	(p8_vmrgew): Add power8 vmrgew and vmrgow instructions.
+	(p8_vmrgow): Likewise.
+	(altivec_and<mode>3): Add clobber of CCmode to allow AND using
+	GPRs to be split under VSX.
+	(p8v_clz<mode>2): Add power8 count leading zero support.
+	(p8v_popcount<mode>2): Add power8 population count support.
+	(p8v_vgbbd): Add power8 gather bits by bytes by doubleword
+	support.
+
+	* config/rs6000/rs6000.md (eqv<mode>3): Add support for powerp eqv
+	instruction.
+
+	* config/rs6000/altivec.h (vec_eqv): Add defines to export power8
+	builtin functions.
+	(vec_nand): Likewise.
+	(vec_vclz): Likewise.
+	(vec_vclzb): Likewise.
+	(vec_vclzd): Likewise.
+	(vec_vclzh): Likewise.
+	(vec_vclzw): Likewise.
+	(vec_vgbbd): Likewise.
+	(vec_vmrgew): Likewise.
+	(vec_vmrgow): Likewise.
+	(vec_vpopcnt): Likewise.
+	(vec_vpopcntb): Likewise.
+	(vec_vpopcntd): Likewise.
+	(vec_vpopcnth): Likewise.
+	(vec_vpopcntw): Likewise.
+
+	Backport from trunk
+
+	2013-05-29  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* config/rs6000/vector.md (VEC_I): Add support for new power8 V2DI
+	instructions.
+	(VEC_A): Likewise.
+	(VEC_C): Likewise.
+	(vrotl<mode>3): Likewise.
+	(vashl<mode>3): Likewise.
+	(vlshr<mode>3): Likewise.
+	(vashr<mode>3): Likewise.
+
+	* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
+	support for power8 V2DI builtins.
+
+	* config/rs6000/rs6000-builtin.def (abs_v2di): Add support for
+	power8 V2DI builtins.
+	(vupkhsw): Likewise.
+	(vupklsw): Likewise.
+	(vaddudm): Likewise.
+	(vminsd): Likewise.
+	(vmaxsd): Likewise.
+	(vminud): Likewise.
+	(vmaxud): Likewise.
+	(vpkudum): Likewise.
+	(vpksdss): Likewise.
+	(vpkudus): Likewise.
+	(vpksdus): Likewise.
+	(vrld): Likewise.
+	(vsld): Likewise.
+	(vsrd): Likewise.
+	(vsrad): Likewise.
+	(vsubudm): Likewise.
+	(vcmpequd): Likewise.
+	(vcmpgtsd): Likewise.
+	(vcmpgtud): Likewise.
+	(vcmpequd_p): Likewise.
+	(vcmpgtsd_p): Likewise.
+	(vcmpgtud_p): Likewise.
+	(vupkhsw): Likewise.
+	(vupklsw): Likewise.
+	(vaddudm): Likewise.
+	(vmaxsd): Likewise.
+	(vmaxud): Likewise.
+	(vminsd): Likewise.
+	(vminud): Likewise.
+	(vpksdss): Likewise.
+	(vpksdus): Likewise.
+	(vpkudum): Likewise.
+	(vpkudus): Likewise.
+	(vrld): Likewise.
+	(vsld): Likewise.
+	(vsrad): Likewise.
+	(vsrd): Likewise.
+	(vsubudm): Likewise.
+
+	* config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Add
+	support for power8 V2DI instructions.
+
+	* config/rs6000/altivec.md (UNSPEC_VPKUHUM): Add support for
+	power8 V2DI instructions.  Combine pack and unpack insns to use an
+	iterator for each mode.  Check whether a particular mode supports
+	Altivec instructions instead of just checking TARGET_ALTIVEC.
+	(UNSPEC_VPKUWUM): Likewise.
+	(UNSPEC_VPKSHSS): Likewise.
+	(UNSPEC_VPKSWSS): Likewise.
+	(UNSPEC_VPKUHUS): Likewise.
+	(UNSPEC_VPKSHUS): Likewise.
+	(UNSPEC_VPKUWUS): Likewise.
+	(UNSPEC_VPKSWUS): Likewise.
+	(UNSPEC_VPACK_SIGN_SIGN_SAT): Likewise.
+	(UNSPEC_VPACK_SIGN_UNS_SAT): Likewise.
+	(UNSPEC_VPACK_UNS_UNS_SAT): Likewise.
+	(UNSPEC_VPACK_UNS_UNS_MOD): Likewise.
+	(UNSPEC_VUPKHSB): Likewise.
+	(UNSPEC_VUNPACK_HI_SIGN): Likewise.
+	(UNSPEC_VUNPACK_LO_SIGN): Likewise.
+	(UNSPEC_VUPKHSH): Likewise.
+	(UNSPEC_VUPKLSB): Likewise.
+	(UNSPEC_VUPKLSH): Likewise.
+	(VI2): Likewise.
+	(VI_char): Likewise.
+	(VI_scalar): Likewise.
+	(VI_unit): Likewise.
+	(VP): Likewise.
+	(VP_small): Likewise.
+	(VP_small_lc): Likewise.
+	(VU_char): Likewise.
+	(add<mode>3): Likewise.
+	(altivec_vaddcuw): Likewise.
+	(altivec_vaddu<VI_char>s): Likewise.
+	(altivec_vadds<VI_char>s): Likewise.
+	(sub<mode>3): Likewise.
+	(altivec_vsubcuw): Likewise.
+	(altivec_vsubu<VI_char>s): Likewise.
+	(altivec_vsubs<VI_char>s): Likewise.
+	(altivec_vavgs<VI_char>): Likewise.
+	(altivec_vcmpbfp): Likewise.
+	(altivec_eq<mode>): Likewise.
+	(altivec_gt<mode>): Likewise.
+	(altivec_gtu<mode>): Likewise.
+	(umax<mode>3): Likewise.
+	(smax<mode>3): Likewise.
+	(umin<mode>3): Likewise.
+	(smin<mode>3): Likewise.
+	(altivec_vpkuhum): Likewise.
+	(altivec_vpkuwum): Likewise.
+	(altivec_vpkshss): Likewise.
+	(altivec_vpkswss): Likewise.
+	(altivec_vpkuhus): Likewise.
+	(altivec_vpkshus): Likewise.
+	(altivec_vpkuwus): Likewise.
+	(altivec_vpkswus): Likewise.
+	(altivec_vpks<VI_char>ss): Likewise.
+	(altivec_vpks<VI_char>us): Likewise.
+	(altivec_vpku<VI_char>us): Likewise.
+	(altivec_vpku<VI_char>um): Likewise.
+	(altivec_vrl<VI_char>): Likewise.
+	(altivec_vsl<VI_char>): Likewise.
+	(altivec_vsr<VI_char>): Likewise.
+	(altivec_vsra<VI_char>): Likewise.
+	(altivec_vsldoi_<mode>): Likewise.
+	(altivec_vupkhsb): Likewise.
+	(altivec_vupkhs<VU_char>): Likewise.
+	(altivec_vupkls<VU_char>): Likewise.
+	(altivec_vupkhsh): Likewise.
+	(altivec_vupklsb): Likewise.
+	(altivec_vupklsh): Likewise.
+	(altivec_vcmpequ<VI_char>_p): Likewise.
+	(altivec_vcmpgts<VI_char>_p): Likewise.
+	(altivec_vcmpgtu<VI_char>_p): Likewise.
+	(abs<mode>2): Likewise.
+	(vec_unpacks_hi_v16qi): Likewise.
+	(vec_unpacks_hi_v8hi): Likewise.
+	(vec_unpacks_lo_v16qi): Likewise.
+	(vec_unpacks_hi_<VP_small_lc>): Likewise.
+	(vec_unpacks_lo_v8hi): Likewise.
+	(vec_unpacks_lo_<VP_small_lc>): Likewise.
+	(vec_pack_trunc_v8h): Likewise.
+	(vec_pack_trunc_v4si): Likewise.
+	(vec_pack_trunc_<mode>): Likewise.
+
+	* config/rs6000/altivec.h (vec_vaddudm): Add defines for power8
+	V2DI builtins.
+	(vec_vmaxsd): Likewise.
+	(vec_vmaxud): Likewise.
+	(vec_vminsd): Likewise.
+	(vec_vminud): Likewise.
+	(vec_vpksdss): Likewise.
+	(vec_vpksdus): Likewise.
+	(vec_vpkudum): Likewise.
+	(vec_vpkudus): Likewise.
+	(vec_vrld): Likewise.
+	(vec_vsld): Likewise.
+	(vec_vsrad): Likewise.
+	(vec_vsrd): Likewise.
+	(vec_vsubudm): Likewise.
+	(vec_vupkhsw): Likewise.
+	(vec_vupklsw): Likewise.
+
+	2013-05-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Add
+	documentation for the power8 crypto builtins.
+
+	* config/rs6000/t-rs6000 (MD_INCLUDES): Add crypto.md.
+
+	* config/rs6000/rs6000-builtin.def (BU_P8V_AV_1): Add support
+	macros for defining power8 builtin functions.
+	(BU_P8V_AV_2): Likewise.
+	(BU_P8V_AV_P): Likewise.
+	(BU_P8V_VSX_1): Likewise.
+	(BU_P8V_OVERLOAD_1): Likewise.
+	(BU_P8V_OVERLOAD_2): Likewise.
+	(BU_CRYPTO_1): Likewise.
+	(BU_CRYPTO_2): Likewise.
+	(BU_CRYPTO_3): Likewise.
+	(BU_CRYPTO_OVERLOAD_1): Likewise.
+	(BU_CRYPTO_OVERLOAD_2): Likewise.
+	(XSCVSPDP): Fix typo, point to the correct instruction.
+	(VCIPHER): Add power8 crypto builtins.
+	(VCIPHERLAST): Likewise.
+	(VNCIPHER): Likewise.
+	(VNCIPHERLAST): Likewise.
+	(VPMSUMB): Likewise.
+	(VPMSUMH): Likewise.
+	(VPMSUMW): Likewise.
+	(VPERMXOR_V2DI): Likewise.
+	(VPERMXOR_V4SI: Likewise.
+	(VPERMXOR_V8HI: Likewise.
+	(VPERMXOR_V16QI: Likewise.
+	(VSHASIGMAW): Likewise.
+	(VSHASIGMAD): Likewise.
+	(VPMSUM): Likewise.
+	(VPERMXOR): Likewise.
+	(VSHASIGMA): Likewise.
+
+	* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
+	__CRYPTO__ if the crypto instructions are available.
+	(altivec_overloaded_builtins): Add support for overloaded power8
+	builtins.
+
+	* config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add
+	support for power8 crypto builtins.
+	(builtin_function_type): Likewise.
+	(altivec_init_builtins): Add support for builtins that take vector
+	long long (V2DI) arguments.
+
+	* config/rs6000/crypto.md: New file, define power8 crypto
+	instructions.
+
+	2013-05-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* doc/invoke.texi (Option Summary): Add power8 options.
+	(RS/6000 and PowerPC Options): Likewise.
+
+	* doc/md.texi (PowerPC and IBM RS6000 constraints): Update to use
+	constraints.md instead of rs6000.h.  Reorder w* constraints.  Add
+	wm, wn, wr documentation.
+
+	* gcc/config/rs6000/constraints.md (wm): New constraint for VSX
+	registers if direct move instructions are enabled.
+	(wn): New constraint for no registers.
+	(wq): New constraint for quad word even GPR registers.
+	(wr): New constraint if 64-bit instructions are enabled.
+	(wv): New constraint if power8 vector instructions are enabled.
+	(wQ): New constraint for quad word memory locations.
+
+	* gcc/config/rs6000/predicates.md (const_0_to_15_operand): New
+	constraint for 0..15 for crypto instructions.
+	(gpc_reg_operand): If VSX allow registers in VSX registers as well
+	as GPR and floating point registers.
+	(int_reg_operand): New predicate to match only GPR registers.
+	(base_reg_operand): New predicate to match base registers.
+	(quad_int_reg_operand): New predicate to match even GPR registers
+	for quad memory operations.
+	(vsx_reg_or_cint_operand): New predicate to allow vector logical
+	operations in both GPR and VSX registers.
+	(quad_memory_operand): New predicate for quad memory operations.
+	(reg_or_indexed_operand): New predicate for direct move support.
+
+	* gcc/config/rs6000/rs6000-cpus.def (ISA_2_5_MASKS_EMBEDDED):
+	Inherit from ISA_2_4_MASKS, not ISA_2_2_MASKS.
+	(ISA_2_7_MASKS_SERVER): New mask for ISA 2.07 (i.e. power8).
+	(POWERPC_MASKS): Add power8 options.
+	(power8 cpu): Use ISA_2_7_MASKS_SERVER instead of specifying the
+	various options.
+
+	* gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros):
+	Define _ARCH_PWR8 and __POWER8_VECTOR__ for power8.
+
+	* gcc/config/rs6000/rs6000.opt (-mvsx-timode): Add documentation.
+	(-mpower8-fusion): New power8 options.
+	(-mpower8-fusion-sign): Likewise.
+	(-mpower8-vector): Likewise.
+	(-mcrypto): Likewise.
+	(-mdirect-move): Likewise.
+	(-mquad-memory): Likewise.
+
+	* gcc/config/rs6000/rs6000.c (power8_cost): Initial definition for
+	power8.
+	(rs6000_hard_regno_mode_ok): Make PTImode only match even GPR
+	registers.
+	(rs6000_debug_reg_print): Print the base register class if
+	-mdebug=reg.
+	(rs6000_debug_vector_unit): Add p8_vector.
+	(rs6000_debug_reg_global): If -mdebug=reg, print power8 constraint
+	definitions.  Also print fusion state.
+	(rs6000_init_hard_regno_mode_ok): Set up power8 constraints.
+	(rs6000_builtin_mask_calculate): Add power8 builtin support.
+	(rs6000_option_override_internal): Add support for power8.
+	(rs6000_common_init_builtins): Add debugging for skipped builtins
+	if -mdebug=builtin.
+	(rs6000_adjust_cost): Add power8 support.
+	(rs6000_issue_rate): Likewise.
+	(insn_must_be_first_in_group): Likewise.
+	(insn_must_be_last_in_group): Likewise.
+	(force_new_group): Likewise.
+	(rs6000_register_move_cost): Likewise.
+	(rs6000_opt_masks): Likewise.
+
+	* config/rs6000/rs6000.h (ASM_CPU_POWER8_SPEC): If we don't have a
+	power8 capable assembler, default to power7 options.
+	(TARGET_DIRECT_MOVE): Likewise.
+	(TARGET_CRYPTO): Likewise.
+	(TARGET_P8_VECTOR): Likewise.
+	(VECTOR_UNIT_P8_VECTOR_P): Define power8 vector support.
+	(VECTOR_UNIT_VSX_OR_P8_VECTOR_P): Likewise.
+	(VECTOR_MEM_P8_VECTOR_P): Likewise.
+	(VECTOR_MEM_VSX_OR_P8_VECTOR_P): Likewise.
+	(VECTOR_MEM_ALTIVEC_OR_VSX_P): Likewise.
+	(TARGET_XSCVDPSPN): Likewise.
+	(TARGET_XSCVSPDPN): Likewsie.
+	(TARGET_SYNC_HI_QI): Likewise.
+	(TARGET_SYNC_TI): Likewise.
+	(MASK_CRYPTO): Likewise.
+	(MASK_DIRECT_MOVE): Likewise.
+	(MASK_P8_FUSION): Likewise.
+	(MASK_P8_VECTOR): Likewise.
+	(REG_ALLOC_ORDER): Move fr13 to be lower in priority so that the
+	TFmode temporary used by some of the direct move instructions to
+	get two FP temporary registers does not force creation of a stack
+	frame.
+	(VLOGICAL_REGNO_P): Allow vector logical operations in GPRs.
+	(MODES_TIEABLE_P): Move the VSX tests above the Altivec tests so
+	that any VSX registers are tieable, even if they are also an
+	Altivec vector mode.
+	(r6000_reg_class_enum): Add wm, wr, wv constraints.
+	(RS6000_BTM_P8_VECTOR): Power8 builtin support.
+	(RS6000_BTM_CRYPTO): Likewise.
+	(RS6000_BTM_COMMON): Likewise.
+
+	* config/rs6000/rs6000.md (cpu attribute): Add power8.
+	* config/rs6000/rs6000-opts.h (PROCESSOR_POWER8): Likewise.
+	(enum rs6000_vector): Add power8 vector support.
+
+
+	Backport from mainline
+	2013-03-20  Pat Haugen <pthaugen@us.ibm.com>
+
+	* config/rs6000/predicates.md (indexed_address, update_address_mem
+	update_indexed_address_mem): New predicates.
+	* config/rs6000/vsx.md (vsx_extract_<mode>_zero): Set correct "type"
+	attribute for load/store instructions.
+	* config/rs6000/dfp.md (movsd_store): Likewise.
+	(movsd_load): Likewise.
+	* config/rs6000/rs6000.md (zero_extend<mode>di2_internal1): Likewise.
+	(unnamed HI->DI extend define_insn): Likewise.
+	(unnamed SI->DI extend define_insn): Likewise.
+	(unnamed QI->SI extend define_insn): Likewise.
+	(unnamed QI->HI extend define_insn): Likewise.
+	(unnamed HI->SI extend define_insn): Likewise.
+	(unnamed HI->SI extend define_insn): Likewise.
+	(extendsfdf2_fpr): Likewise.
+	(movsi_internal1): Likewise.
+	(movsi_internal1_single): Likewise.
+	(movhi_internal): Likewise.
+	(movqi_internal): Likewise.
+	(movcc_internal1): Correct mnemonic for stw insn. Set correct "type"
+	attribute for load/store instructions.
+	(mov<mode>_hardfloat): Set correct "type" attribute for load/store
+	instructions.
+	(mov<mode>_softfloat): Likewise.
+	(mov<mode>_hardfloat32): Likewise.
+	(mov<mode>_hardfloat64): Likewise.
+	(mov<mode>_softfloat64): Likewise.
+	(movdi_internal32): Likewise.
+	(movdi_internal64): Likewise.
+	(probe_stack_<mode>): Likewise.
+
+	Backport from mainline
+	2013-03-20  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/vector.md (VEC_R): Add 32-bit integer, binary
+	floating point, and decimal floating point to reload iterator.
+
+	* config/rs6000/constraints.md (wl constraint): New constraints to
+	return FLOAT_REGS if certain options are used to reduce the number
+	of separate patterns that exist in the file.
+	(wx constraint): Likewise.
+	(wz constraint): Likewise.
+
+	* config/rs6000/rs6000.c (rs6000_debug_reg_global): If
+	-mdebug=reg, print wg, wl, wx, and wz constraints.
+	(rs6000_init_hard_regno_mode_ok): Initialize new constraints.
+	Initialize the reload functions for 64-bit binary/decimal floating
+	point types.
+	(reg_offset_addressing_ok_p): If we are on a power7 or later, use
+	LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
+	create the buffer on the stack to overcome not having a 32-bit
+	load and store.
+	(rs6000_emit_move): Likewise.
+	(rs6000_secondary_memory_needed_rtx): Likewise.
+	(rs6000_alloc_sdmode_stack_slot): Likewise.
+	(rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
+	via xxlxor, just like DFmode 0.0.
+
+	* config/rs6000/rs6000.h (TARGET_NO_SDMODE_STACK): New macro)
+	(define as 1 if we are running on a power7 or newer.
+	(enum r6000_reg_class_enum): Add new constraints.
+
+	* config/rs6000/dfp.md (movsd): Delete, combine with binary
+	floating point moves in rs6000.md.  Combine power6x (mfpgpr) moves
+	with other moves by using conditional constraits (wg).  Use LFIWZX
+	and STFIWX for loading SDmode on power7.  Use xxlxor to create
+	0.0f.
+	(movsd splitter): Likewise.
+	(movsd_hardfloat): Likewise.
+	(movsd_softfloat): Likewise.
+
+	* config/rs6000/rs6000.md (FMOVE32): New iterators to combine
+	binary and decimal floating point moves.
+	(fmove_ok): New attributes to combine binary and decimal floating
+	point moves, and to combine power6x (mfpgpr) moves along normal
+	floating moves.
+	(real_value_to_target): Likewise.
+	(f32_lr): Likewise.
+	(f32_lm): Likewise.
+	(f32_li): Likewise.
+	(f32_sr): Likewise.
+	(f32_sm): Likewise.
+	(f32_si): Likewise.
+	(movsf): Combine binary and decimal floating point moves.  Combine
+	power6x (mfpgpr) moves with other moves by using conditional
+	constraits (wg).  Use LFIWZX and STFIWX for loading SDmode on
+	power7.
+	(mov<mode> for SFmode/SDmode); Likewise.
+	(SFmode/SDmode splitters): Likewise.
+	(movsf_hardfloat): Likewise.
+	(mov<mode>_hardfloat for SFmode/SDmode): Likewise.
+	(movsf_softfloat): Likewise.
+	(mov<mode>_softfloat for SFmode/SDmode): Likewise.
+
+	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document wl)
+	(wx and wz constraints.
+
+	* config/rs6000/constraints.md (wg constraint): New constraint to
+	return FLOAT_REGS if -mmfpgpr (power6x) was used.
+
+	* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add wg
+	constraint.
+
+	* config/rs6000/rs6000.c (rs6000_debug_reg_global): If
+	-mdebug=reg, print wg, wl, wx, and wz constraints.
+	(rs6000_init_hard_regno_mode_ok): Initialize new constraints.
+	Initialize the reload functions for 64-bit binary/decimal floating
+	point types.
+	(reg_offset_addressing_ok_p): If we are on a power7 or later, use
+	LFIWZX and STFIWX to load/store 32-bit decimal types, and don't
+	create the buffer on the stack to overcome not having a 32-bit
+	load and store.
+	(rs6000_emit_move): Likewise.
+	(rs6000_secondary_memory_needed_rtx): Likewise.
+	(rs6000_alloc_sdmode_stack_slot): Likewise.
+	(rs6000_preferred_reload_class): On VSX, we can create SFmode 0.0f
+	via xxlxor, just like DFmode 0.0.
+
+
+	* config/rs6000/dfp.md (movdd): Delete, combine with binary
+	floating point moves in rs6000.md.  Combine power6x (mfpgpr) moves
+	with other moves by using conditional constraits (wg).  Use LFIWZX
+	and STFIWX for loading SDmode on power7.
+	(movdd splitters): Likewise.
+	(movdd_hardfloat32): Likewise.
+	(movdd_softfloat32): Likewise.
+	(movdd_hardfloat64_mfpgpr): Likewise.
+	(movdd_hardfloat64): Likewise.
+	(movdd_softfloat64): Likewise.
+
+	* config/rs6000/rs6000.md (FMOVE64): New iterators to combine
+	64-bit binary and decimal floating point moves.
+	(FMOVE64X): Likewise.
+	(movdf): Combine 64-bit binary and decimal floating point moves.
+	Combine power6x (mfpgpr) moves with other moves by using
+	conditional constraits (wg).
+	(mov<mode> for DFmode/DDmode): Likewise.
+	(DFmode/DDmode splitters): Likewise.
+	(movdf_hardfloat32): Likewise.
+	(mov<mode>_hardfloat32 for DFmode/DDmode): Likewise.
+	(movdf_softfloat32): Likewise.
+	(movdf_hardfloat64_mfpgpr): Likewise.
+	(movdf_hardfloat64): Likewise.
+	(mov<mode>_hardfloat64 for DFmode/DDmode): Likewise.
+	(movdf_softfloat64): Likewise.
+	(mov<mode>_softfloat64 for DFmode/DDmode): Likewise.
+	(reload_<mode>_load): Move to later in the file so they aren't in
+	the middle of the floating point move insns.
+	(reload_<mode>_store): Likewise.
+
+	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document wg
+	constraint.
+
+	* config/rs6000/rs6000.c (rs6000_debug_reg_global): Print out wg
+	constraint if -mdebug=reg.
+	(rs6000_initi_hard_regno_mode_ok): Enable wg constraint if
+	-mfpgpr.  Enable using dd reload support if needed.
+
+	* config/rs6000/dfp.md (movtd): Delete, combine with 128-bit
+	binary and decimal floating point moves in rs6000.md.
+	(movtd_internal): Likewise.
+
+	* config/rs6000/rs6000.md (FMOVE128): Combine 128-bit binary and
+	decimal floating point moves.
+	(movtf): Likewise.
+	(movtf_internal): Likewise.
+	(mov<mode>_internal, TDmode/TFmode): Likewise.
+	(movtf_softfloat): Likewise.
+	(mov<mode>_softfloat, TDmode/TFmode): Likewise.
+
+	* config/rs6000/rs6000.md (movdi_mfpgpr): Delete, combine with
+	movdi_internal64, using wg constraint for move direct operations.
+	(movdi_internal64): Likewise.
+
+	* config/rs6000/rs6000.c (rs6000_debug_reg_global): Print
+	MODES_TIEABLE_P for selected modes.  Print the numerical value of
+	the various virtual registers. Use GPR/FPR first/last values)
+	(instead of hard coding the register numbers.  Print which modes
+	have reload functions registered.
+	(rs6000_option_override_internal): If -mdebug=reg, trace the
+	options settings before/after setting cpu, target and subtarget
+	settings.
+	(rs6000_secondary_reload_trace): Improve the RTL dump for
+	-mdebug=addr and for secondary reload failures in
+	rs6000_secondary_reload_inner.
+	(rs6000_secondary_reload_fail): Likewise.
+	(rs6000_secondary_reload_inner): Likewise.
+
+	* config/rs6000/rs6000.md (FIRST_GPR_REGNO): Add convenience
+	macros for first/last GPR and FPR registers.
+	(LAST_GPR_REGNO): Likewise.
+	(FIRST_FPR_REGNO): Likewise.
+	(LAST_FPR_REGNO): Likewise.
+
+	* config/rs6000/vector.md (mul<mode>3): Use the combined macro
+	VECTOR_UNIT_ALTIVEC_OR_VSX_P instead of separate calls to
+	VECTOR_UNIT_ALTIVEC_P and VECTOR_UNIT_VSX_P.
+	(vcond<mode><mode>): Likewise.
+	(vcondu<mode><mode>): Likewise.
+	(vector_gtu<mode>): Likewise.
+	(vector_gte<mode>): Likewise.
+	(xor<mode>3): Don't allow logical operations on TImode in 32-bit
+	to prevent the compiler from converting DImode operations to
+	TImode.
+	(ior<mode>3): Likewise.
+	(and<mode>3): Likewise.
+	(one_cmpl<mode>2): Likewise.
+	(nor<mode>3): Likewise.
+	(andc<mode>3): Likewise.
+
+	* config/rs6000/constraints.md (wt constraint): New constraint
+	that returns VSX_REGS if TImode is allowed in VSX registers.
+
+	* config/rs6000/predicates.md (easy_fp_constant): 0.0f is an easy
+	constant under VSX.
+
+	* config/rs6000/rs6000-modes.def (PTImode): Define, PTImode is
+	similar to TImode, but it is restricted to being in the GPRs.
+
+	* config/rs6000/rs6000.opt (-mvsx-timode): New switch to allow
+	TImode to occupy a single VSX register.
+
+	* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Default to
+	-mvsx-timode for power7/power8.
+	(power7 cpu): Likewise.
+	(power8 cpu): Likewise.
+
+	* config/rs6000/rs6000.c (rs6000_hard_regno_nregs_internal): Make
+	sure that TFmode/TDmode take up two registers if they are ever
+	allowed in the upper VSX registers.
+	(rs6000_hard_regno_mode_ok): If -mvsx-timode, allow TImode in VSX
+	registers.
+	(rs6000_init_hard_regno_mode_ok): Likewise.
+	(rs6000_debug_reg_global): Add debugging for PTImode and wt
+	constraint.  Print if LRA is turned on.
+	(rs6000_option_override_internal): Give an error if -mvsx-timode
+	and VSX is not enabled.
+	(invalid_e500_subreg): Handle PTImode, restricting it to GPRs.  If
+	-mvsx-timode, restrict TImode to reg+reg addressing, and PTImode
+	to reg+offset addressing.  Use PTImode when checking offset
+	addresses for validity.
+	(reg_offset_addressing_ok_p): Likewise.
+	(rs6000_legitimate_offset_address_p): Likewise.
+	(rs6000_legitimize_address): Likewise.
+	(rs6000_legitimize_reload_address): Likewise.
+	(rs6000_legitimate_address_p): Likewise.
+	(rs6000_eliminate_indexed_memrefs): Likewise.
+	(rs6000_emit_move): Likewise.
+	(rs6000_secondary_reload): Likewise.
+	(rs6000_secondary_reload_inner): Handle PTImode.  Allow 64-bit
+	reloads to fpr registers to continue to use reg+offset addressing)
+	(but 64-bit reloads to altivec registers need reg+reg addressing.
+	Drop test for PRE_MODIFY, since VSX loads/stores no longer support
+	it.  Treat LO_SUM like a PLUS operation.
+	(rs6000_secondary_reload_class): If type is 64-bit, prefer to use
+	FLOAT_REGS instead of VSX_RGS to allow use of reg+offset
+	addressing.
+	(rs6000_cannot_change_mode_class): Do not allow TImode in VSX
+	registers to share a register with a smaller sized type, since VSX
+	puts scalars in the upper 64-bits.
+	(print_operand): Add support for PTImode.
+	(rs6000_register_move_cost): Use VECTOR_MEM_VSX_P instead of
+	VECTOR_UNIT_VSX_P to catch types that can be loaded in VSX
+	registers, but don't have arithmetic support.
+	(rs6000_memory_move_cost): Add test for VSX.
+	(rs6000_opt_masks): Add -mvsx-timode.
+
+	* config/rs6000/vsx.md (VSm): Change to use 64-bit aligned moves
+	for TImode.
+	(VSs): Likewise.
+	(VSr): Use wt constraint for TImode.
+	(VSv): Drop TImode support.
+	(vsx_movti): Delete, replace with versions for 32-bit and 64-bit.
+	(vsx_movti_64bit): Likewise.
+	(vsx_movti_32bit): Likewise.
+	(vec_store_<mode>): Use VSX iterator instead of vector iterator.
+	(vsx_and<mode>3): Delete use of '?' constraint on inputs, just put
+	one '?' on the appropriate output constraint.  Do not allow TImode
+	logical operations on 32-bit systems.
+	(vsx_ior<mode>3): Likewise.
+	(vsx_xor<mode>3): Likewise.
+	(vsx_one_cmpl<mode>2): Likewise.
+	(vsx_nor<mode>3): Likewise.
+	(vsx_andc<mode>3): Likewise.
+	(vsx_concat_<mode>): Likewise.
+	(vsx_xxpermdi_<mode>): Fix thinko for non V2DF/V2DI modes.
+
+	* config/rs6000/rs6000.h (MASK_VSX_TIMODE): Map from
+	OPTION_MASK_VSX_TIMODE.
+	(enum rs6000_reg_class_enum): Add RS6000_CONSTRAINT_wt.
+	(STACK_SAVEAREA_MODE): Use PTImode instead of TImode.
+
+	* config/rs6000/rs6000.md (INT mode attribute): Add PTImode.
+	(TI2 iterator): New iterator for TImode, PTImode.
+	(wd mode attribute): Add values for vector types.
+	(movti_string): Replace TI move operations with operations for
+	TImode and PTImode.  Add support for TImode being allowed in VSX
+	registers.
+	(mov<mode>_string, TImode/PTImode): Likewise.
+	(movti_ppc64): Likewise.
+	(mov<mode>_ppc64, TImode/PTImode): Likewise.
+	(TI mode splitters): Likewise.
+
+	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document wt
+	constraint.
+
 2014-04-04  Richard Biener  <rguenther@suse.de>
 
 	* tree-ssanames.c (make_ssa_name_fn): Fix assert.
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index fd6d07f50ff7..4b91c5c5e242 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -321,6 +321,42 @@
 #define vec_vsx_st __builtin_vec_vsx_st
 #endif
 
+#ifdef _ARCH_PWR8
+/* Vector additions added in ISA 2.07.  */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
+#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vgbbd __builtin_vec_vgbbd
+#define vec_vmaxsd __builtin_vec_vmaxsd
+#define vec_vmaxud __builtin_vec_vmaxud
+#define vec_vminsd __builtin_vec_vminsd
+#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
+#define vec_vpksdss __builtin_vec_vpksdss
+#define vec_vpksdus __builtin_vec_vpksdus
+#define vec_vpkudum __builtin_vec_vpkudum
+#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
+#define vec_vrld __builtin_vec_vrld
+#define vec_vsld __builtin_vec_vsld
+#define vec_vsrad __builtin_vec_vsrad
+#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vupkhsw __builtin_vec_vupkhsw
+#define vec_vupklsw __builtin_vec_vupklsw
+#endif
+
 /* Predicates.
    For C++, we use templates in order to allow non-parenthesized arguments.
    For C, instead, we use macros since non-parenthesized arguments were
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1b0b5c3fb132..db2e93a168a5 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -41,15 +41,11 @@
    UNSPEC_VMULOSB
    UNSPEC_VMULOUH
    UNSPEC_VMULOSH
-   UNSPEC_VPKUHUM
-   UNSPEC_VPKUWUM
    UNSPEC_VPKPX
-   UNSPEC_VPKSHSS
-   UNSPEC_VPKSWSS
-   UNSPEC_VPKUHUS
-   UNSPEC_VPKSHUS
-   UNSPEC_VPKUWUS
-   UNSPEC_VPKSWUS
+   UNSPEC_VPACK_SIGN_SIGN_SAT
+   UNSPEC_VPACK_SIGN_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_MOD
    UNSPEC_VSLV4SI
    UNSPEC_VSLO
    UNSPEC_VSR
@@ -71,12 +67,10 @@
    UNSPEC_VLOGEFP
    UNSPEC_VEXPTEFP
    UNSPEC_VLSDOI
-   UNSPEC_VUPKHSB
+   UNSPEC_VUNPACK_HI_SIGN
+   UNSPEC_VUNPACK_LO_SIGN
    UNSPEC_VUPKHPX
-   UNSPEC_VUPKHSH
-   UNSPEC_VUPKLSB
    UNSPEC_VUPKLPX
-   UNSPEC_VUPKLSH
    UNSPEC_DST
    UNSPEC_DSTT
    UNSPEC_DSTST
@@ -134,6 +128,7 @@
    UNSPEC_VUPKLS_V4SF
    UNSPEC_VUPKHU_V4SF
    UNSPEC_VUPKLU_V4SF
+   UNSPEC_VGBBD
 ])
 
 (define_c_enum "unspecv"
@@ -146,6 +141,8 @@
 
 ;; Vec int modes
 (define_mode_iterator VI [V4SI V8HI V16QI])
+;; Like VI, but add ISA 2.07 integer vector ops
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 ;; Short vec in modes
 (define_mode_iterator VIshort [V8HI V16QI])
 ;; Vec float modes
@@ -159,8 +156,18 @@
 ;; Like VM, except don't do TImode
 (define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
 
-(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
-(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
+			   (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+			   (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
+			   (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
+
+;; Vector pack/unpack
+(define_mode_iterator VP [V2DI V4SI V8HI])
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
 
 ;; Vector move instructions.
 (define_insn "*altivec_mov<mode>"
@@ -378,10 +385,10 @@
 
 ;; add
 (define_insn "add<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (plus:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vaddu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -398,17 +405,17 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VADDCUW))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vaddcuw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vaddu<VI_char>s"
   [(set (match_operand:VI 0 "register_operand" "=v")
         (unspec:VI [(match_operand:VI 1 "register_operand" "v")
-                    (match_operand:VI 2 "register_operand" "v")]
+		    (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VADDU))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "<VI_unit>"
   "vaddu<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -418,16 +425,16 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VADDS))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vadds<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 ;; sub
 (define_insn "sub<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (minus:VI (match_operand:VI 1 "register_operand" "v")
-                  (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		   (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsubu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -444,7 +451,7 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VSUBCUW))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vsubcuw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -454,7 +461,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VSUBU))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vsubu<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -464,7 +471,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VSUBS))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vsubs<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -483,7 +490,7 @@
         (unspec:VI [(match_operand:VI 1 "register_operand" "v")
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VAVGS))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vavgs<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -492,31 +499,31 @@
         (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
                       (match_operand:V4SF 2 "register_operand" "v")] 
                       UNSPEC_VCMPBFP))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vcmpbfp %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_eq<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
-	       (match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpequ<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_gt<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
-	       (match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpgts<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_gtu<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
-		(match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		 (match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpgtu<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
@@ -744,18 +751,18 @@
 ;; max
 
 (define_insn "umax<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (umax:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmaxu<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "smax<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (smax:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmaxs<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -768,18 +775,18 @@
   [(set_attr "type" "veccmp")])
 
 (define_insn "umin<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (umin:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vminu<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "smin<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (smin:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmins<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -935,6 +942,31 @@
   "vmrglw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_P8_VECTOR"
+  "vmrgew %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_P8_VECTOR"
+  "vmrgow %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
 (define_insn "vec_widen_umult_even_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1008,74 +1040,7 @@
   [(set_attr "type" "veccomplex")])
 
 
-;; logical ops.  Have the logical ops follow the memory ops in
-;; terms of whether to prefer VSX or Altivec
-
-(define_insn "*altivec_and<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (and:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vand %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_ior<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (ior:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vor %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_xor<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (xor:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vxor %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_one_cmpl<mode>2"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (not:VM (match_operand:VM 1 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vnor %0,%1,%1"
-  [(set_attr "type" "vecsimple")])
-  
-(define_insn "*altivec_nor<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
-			(match_operand:VM 2 "register_operand" "v"))))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vnor %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_andc<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (and:VM (not:VM (match_operand:VM 2 "register_operand" "v"))
-		(match_operand:VM 1 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vandc %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "altivec_vpkuhum"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKUHUM))]
-  "TARGET_ALTIVEC"
-  "vpkuhum %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkuwum"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKUWUM))]
-  "TARGET_ALTIVEC"
-  "vpkuwum %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
+;; Vector pack/unpack
 (define_insn "altivec_vpkpx"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1085,71 +1050,47 @@
   "vpkpx %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkshss"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKSHSS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkshss %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>ss"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_SIGN_SAT))]
+  "<VI_unit>"
+  "vpks<VI_char>ss %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkswss"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKSWSS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkswss %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_UNS_SAT))]
+  "<VI_unit>"
+  "vpks<VI_char>us %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkuhus"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKUHUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkuhus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_SAT))]
+  "<VI_unit>"
+  "vpku<VI_char>us %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkshus"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKSHUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkshus %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkuwus"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKUWUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkuwus %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkswus"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKSWUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkswus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>um"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "vpku<VI_char>um %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vrl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (rotate:VI (match_operand:VI 1 "register_operand" "v")
-		   (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -1172,26 +1113,26 @@
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vsl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (ashift:VI (match_operand:VI 1 "register_operand" "v")
-		   (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "*altivec_vsr<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
-		     (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsr<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "*altivec_vsra<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
-		     (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsra<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -1476,12 +1417,20 @@
   "vsldoi %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhsb"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHSB))]
-  "TARGET_ALTIVEC"
-  "vupkhsb %0,%1"
+(define_insn "altivec_vupkhs<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_HI_SIGN))]
+  "<VI_unit>"
+  "vupkhs<VU_char> %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkls<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_LO_SIGN))]
+  "<VI_unit>"
+  "vupkls<VU_char> %0,%1"
   [(set_attr "type" "vecperm")])
 
 (define_insn "altivec_vupkhpx"
@@ -1492,22 +1441,6 @@
   "vupkhpx %0,%1"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhsh"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHSH))]
-  "TARGET_ALTIVEC"
-  "vupkhsh %0,%1"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vupklsb"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLSB))]
-  "TARGET_ALTIVEC"
-  "vupklsb %0,%1"
-  [(set_attr "type" "vecperm")])
-
 (define_insn "altivec_vupklpx"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
@@ -1516,49 +1449,41 @@
   "vupklpx %0,%1"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupklsh"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLSH))]
-  "TARGET_ALTIVEC"
-  "vupklsh %0,%1"
-  [(set_attr "type" "vecperm")])
-
 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 ;; indicate a combined status
 (define_insn "*altivec_vcmpequ<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
-			   (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(eq:VI (match_dup 1)
-	       (match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(eq:VI2 (match_dup 1)
+		(match_dup 2)))]
+  "<VI_unit>"
   "vcmpequ<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_vcmpgts<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
-			   (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(gt:VI (match_dup 1)
-	       (match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gt:VI2 (match_dup 1)
+		(match_dup 2)))]
+  "<VI_unit>"
   "vcmpgts<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_vcmpgtu<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
-			    (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
+			    (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(gtu:VI (match_dup 1)
-		(match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gtu:VI2 (match_dup 1)
+		 (match_dup 2)))]
+  "<VI_unit>"
   "vcmpgtu<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
@@ -1779,20 +1704,28 @@
   [(set_attr "type" "vecstore")])
 
 ;; Generate
-;;    vspltis? SCRATCH0,0
+;;    xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
 ;;    vsubu?m SCRATCH2,SCRATCH1,%1
 ;;    vmaxs? %0,%1,SCRATCH2"
 (define_expand "abs<mode>2"
-  [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
-   (set (match_dup 3)
-        (minus:VI (match_dup 2)
-                  (match_operand:VI 1 "register_operand" "v")))
-   (set (match_operand:VI 0 "register_operand" "=v")
-        (smax:VI (match_dup 1) (match_dup 3)))]
-  "TARGET_ALTIVEC"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4)
+        (minus:VI2 (match_dup 2)
+		   (match_operand:VI2 1 "register_operand" "v")))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_dup 1) (match_dup 4)))]
+  "<VI_unit>"
 {
-  operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
-  operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+  int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  /* Create an all 0 constant.  */
+  for (i = 0; i < n_elt; ++i)
+    RTVEC_ELT (v, i) = const0_rtx;
+
+  operands[2] = gen_reg_rtx (<MODE>mode);
+  operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
+  operands[4] = gen_reg_rtx (<MODE>mode);
 })
 
 ;; Generate
@@ -1950,49 +1883,19 @@
   DONE;
 }")
 
-(define_expand "vec_unpacks_hi_v16qi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-                     UNSPEC_VUPKHSB))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
-  DONE;
-}")
-
-(define_expand "vec_unpacks_hi_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-                     UNSPEC_VUPKHSH))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
-  DONE;
-}")
-
-(define_expand "vec_unpacks_lo_v16qi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-                     UNSPEC_VUPKLSB))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
-  DONE;
-}")
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_HI_SIGN))]
+  "<VI_unit>"
+  "")
 
-(define_expand "vec_unpacks_lo_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-                     UNSPEC_VUPKLSH))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
-  DONE;
-}")
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_LO_SIGN))]
+  "<VI_unit>"
+  "")
 
 (define_insn "vperm_v8hiv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -2291,29 +2194,13 @@
   DONE;
 }")
 
-(define_expand "vec_pack_trunc_v8hi"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-                      UNSPEC_VPKUHUM))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
-  DONE;
-}")
-                                                                                
-(define_expand "vec_pack_trunc_v4si"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-                     UNSPEC_VPKUWUM))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
-  DONE;
-}")
+(define_expand "vec_pack_trunc_<mode>"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+        (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+                      UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "")
 
 (define_expand "altivec_negv4sf2"
   [(use (match_operand:V4SF 0 "register_operand" ""))
@@ -2460,3 +2347,34 @@
   emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
   DONE;
 }")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+	(clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vclz<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vpopcnt<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_VGBBD))]
+  "TARGET_P8_VECTOR"
+  "vgbbd %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index ccb61edce45b..4467b9e3d8bf 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -52,21 +52,61 @@
   "@internal")
 
 ;; Use w as a prefix to add VSX modes
-;; vector double (V2DF)
+;; any VSX register
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
+  "Any VSX register if the -mvsx option was used or NO_REGS.")
+
 (define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
-  "@internal")
+  "VSX vector register to hold vector double data or NO_REGS.")
 
-;; vector float (V4SF)
 (define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
-  "@internal")
+  "VSX vector register to hold vector float data or NO_REGS.")
+
+(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]"
+  "If -mmfpgpr was used, a floating point register or NO_REGS.")
+
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
+  "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
+  "VSX register if direct move instructions are enabled, or NO_REGS.")
+
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
+;; direct move directly, and movsf can't to move between the register sets.
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
+(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
+
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
+  "General purpose register if 64-bit instructions are enabled or NO_REGS.")
 
-;; scalar double (DF)
 (define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
-  "@internal")
+  "VSX vector register to hold scalar double values or NO_REGS.")
 
-;; any VSX register
-(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
-  "@internal")
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
+  "VSX vector register to hold 128 bit integer or NO_REGS.")
+
+(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]"
+  "Altivec register to use for float/32-bit int loads/stores  or NO_REGS.")
+
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
+  "Altivec register to use for double loads/stores  or NO_REGS.")
+
+(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
+  "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
+
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
+  "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]"
+  "VSX vector register to hold scalar float values or NO_REGS.")
+
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
+  "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
+
+;; Lq/stq validates the address for load/store quad
+(define_memory_constraint "wQ"
+  "Memory operand suitable for the load/store quad instructions"
+  (match_operand 0 "quad_memory_operand"))
 
 ;; Altivec style load/store that ignores the bottom bits of the address
 (define_memory_constraint "wZ"
diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md
new file mode 100644
index 000000000000..9f7e4a1b255c
--- /dev/null
+++ b/gcc/config/rs6000/crypto.md
@@ -0,0 +1,101 @@
+;; Cryptographic instructions added in ISA 2.07
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+  [UNSPEC_VCIPHER
+   UNSPEC_VNCIPHER
+   UNSPEC_VCIPHERLAST
+   UNSPEC_VNCIPHERLAST
+   UNSPEC_VSBOX
+   UNSPEC_VSHASIGMA
+   UNSPEC_VPERMXOR
+   UNSPEC_VPMSUM])
+
+;; Iterator for VPMSUM/VPERMXOR
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
+
+(define_mode_attr CR_char [(V16QI "b")
+			   (V8HI  "h")
+			   (V4SI  "w")
+			   (V2DI  "d")])
+
+;; Iterator for VSHASIGMAD/VSHASIGMAW
+(define_mode_iterator CR_hash [V4SI V2DI])
+
+;; Iterator for the other crypto functions
+(define_int_iterator CR_code   [UNSPEC_VCIPHER
+				UNSPEC_VNCIPHER
+				UNSPEC_VCIPHERLAST
+				UNSPEC_VNCIPHERLAST])
+
+(define_int_attr CR_insn [(UNSPEC_VCIPHER      "vcipher")
+			  (UNSPEC_VNCIPHER     "vncipher")
+			  (UNSPEC_VCIPHERLAST  "vcipherlast")
+			  (UNSPEC_VNCIPHERLAST "vncipherlast")])
+
+;; 2 operand crypto instructions
+(define_insn "crypto_<CR_insn>"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
+		      (match_operand:V2DI 2 "register_operand" "v")]
+		     CR_code))]
+  "TARGET_CRYPTO"
+  "<CR_insn> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "crypto_vpmsum<CR_char>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")]
+			UNSPEC_VPMSUM))]
+  "TARGET_CRYPTO"
+  "vpmsum<CR_char> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+;; 3 operand crypto instructions
+(define_insn "crypto_vpermxor_<mode>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")
+			 (match_operand:CR_mode 3 "register_operand" "v")]
+			UNSPEC_VPERMXOR))]
+  "TARGET_CRYPTO"
+  "vpermxor %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+;; 1 operand crypto instruction
+(define_insn "crypto_vsbox"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
+		     UNSPEC_VSBOX))]
+  "TARGET_CRYPTO"
+  "vsbox %0,%1"
+  [(set_attr "type" "crypto")])
+
+;; Hash crypto instructions
+(define_insn "crypto_vshasigma<CR_char>"
+  [(set (match_operand:CR_hash 0 "register_operand" "=v")
+	(unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
+			 (match_operand:SI 2 "const_0_to_1_operand" "n")
+			 (match_operand:SI 3 "const_0_to_15_operand" "n")]
+			UNSPEC_VSHASIGMA))]
+  "TARGET_CRYPTO"
+  "vshasigma<CR_char> %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index 040bae490013..0c50c782aa9f 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -29,77 +29,6 @@
   ])
 
 
-(define_expand "movsd"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "")
-	(match_operand:SD 1 "any_operand" ""))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
-
-(define_split
-  [(set (match_operand:SD 0 "gpc_reg_operand" "")
-	(match_operand:SD 1 "const_double_operand" ""))]
-  "reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 3))]
-  "
-{
-  long l;
-  REAL_VALUE_TYPE rv;
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
-
-  if (! TARGET_POWERPC64)
-    operands[2] = operand_subword (operands[0], 0, 0, SDmode);
-  else
-    operands[2] = gen_lowpart (SImode, operands[0]);
-
-  operands[3] = gen_int_mode (l, SImode);
-}")
-
-(define_insn "movsd_hardfloat"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r")
-	(match_operand:SD 1 "input_operand"        "r,m,r,f,r,h,0,G,Fn"))]
-  "(gpc_reg_operand (operands[0], SDmode)
-   || gpc_reg_operand (operands[1], SDmode))
-   && (TARGET_HARD_FLOAT && TARGET_FPRS)"
-  "@
-   mr %0,%1
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   fmr %0,%1
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #"
-  [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,8")])
-
-(define_insn "movsd_softfloat"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h")
-	(match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))]
-  "(gpc_reg_operand (operands[0], SDmode)
-   || gpc_reg_operand (operands[1], SDmode))
-   && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
-  "@
-   mr %0,%1
-   mt%0 %1
-   mf%1 %0
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   li %0,%1
-   lis %0,%v1
-   la %0,%a1
-   #
-   #
-   nop"
-  [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")])
-
 (define_insn "movsd_store"
   [(set (match_operand:DD 0 "nonimmediate_operand" "=m")
 	(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
@@ -108,7 +37,14 @@
    || gpc_reg_operand (operands[1], SDmode))
    && TARGET_HARD_FLOAT && TARGET_FPRS"
   "stfd%U0%X0 %1,%0"
-  [(set_attr "type" "fpstore")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	(const_string "fpstore_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[0], VOIDmode)")
+	  (const_string "fpstore_u")
+	  (const_string "fpstore"))))
    (set_attr "length" "4")])
 
 (define_insn "movsd_load"
@@ -119,7 +55,14 @@
    || gpc_reg_operand (operands[1], DDmode))
    && TARGET_HARD_FLOAT && TARGET_FPRS"
   "lfd%U1%X1 %0,%1"
-  [(set_attr "type" "fpload")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[1], VOIDmode)")
+	  (const_string "fpload_u")
+	  (const_string "fpload"))))
    (set_attr "length" "4")])
 
 ;; Hardware support for decimal floating point operations.
@@ -182,211 +125,6 @@
   "fnabs %0,%1"
   [(set_attr "type" "fp")])
 
-(define_expand "movdd"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "")
-	(match_operand:DD 1 "any_operand" ""))]
-  ""
-  "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }")
-
-(define_split
-  [(set (match_operand:DD 0 "gpc_reg_operand" "")
-	(match_operand:DD 1 "const_int_operand" ""))]
-  "! TARGET_POWERPC64 && reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 4))
-   (set (match_dup 3) (match_dup 1))]
-  "
-{
-  int endian = (WORDS_BIG_ENDIAN == 0);
-  HOST_WIDE_INT value = INTVAL (operands[1]);
-
-  operands[2] = operand_subword (operands[0], endian, 0, DDmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
-#if HOST_BITS_PER_WIDE_INT == 32
-  operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
-#else
-  operands[4] = GEN_INT (value >> 32);
-  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
-#endif
-}")
-
-(define_split
-  [(set (match_operand:DD 0 "gpc_reg_operand" "")
-	(match_operand:DD 1 "const_double_operand" ""))]
-  "! TARGET_POWERPC64 && reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 4))
-   (set (match_dup 3) (match_dup 5))]
-  "
-{
-  int endian = (WORDS_BIG_ENDIAN == 0);
-  long l[2];
-  REAL_VALUE_TYPE rv;
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
-
-  operands[2] = operand_subword (operands[0], endian, 0, DDmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
-  operands[4] = gen_int_mode (l[endian], SImode);
-  operands[5] = gen_int_mode (l[1 - endian], SImode);
-}")
-
-(define_split
-  [(set (match_operand:DD 0 "gpc_reg_operand" "")
-	(match_operand:DD 1 "const_double_operand" ""))]
-  "TARGET_POWERPC64 && reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 3))]
-  "
-{
-  int endian = (WORDS_BIG_ENDIAN == 0);
-  long l[2];
-  REAL_VALUE_TYPE rv;
-#if HOST_BITS_PER_WIDE_INT >= 64
-  HOST_WIDE_INT val;
-#endif
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
-
-  operands[2] = gen_lowpart (DImode, operands[0]);
-  /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
-#if HOST_BITS_PER_WIDE_INT >= 64
-  val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
-	 | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
-
-  operands[3] = gen_int_mode (val, DImode);
-#else
-  operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
-#endif
-}")
-
-;; Don't have reload use general registers to load a constant.  First,
-;; it might not work if the output operand is the equivalent of
-;; a non-offsettable memref, but also it is less efficient than loading
-;; the constant into an FP register, since it will probably be used there.
-;; The "??" is a kludge until we can figure out a more reasonable way
-;; of handling these non-offsettable values.
-(define_insn "*movdd_hardfloat32"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r")
-	(match_operand:DD 1 "input_operand" "r,m,r,d,m,d,G,H,F"))]
-  "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "*
-{
-  switch (which_alternative)
-    {
-    default:
-      gcc_unreachable ();
-    case 0:
-    case 1:
-    case 2:
-      return \"#\";
-    case 3:
-      return \"fmr %0,%1\";
-    case 4:
-      return \"lfd%U1%X1 %0,%1\";
-    case 5:
-      return \"stfd%U0%X0 %1,%0\";
-    case 6:
-    case 7:
-    case 8:
-      return \"#\";
-    }
-}"
-  [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
-   (set_attr "length" "8,16,16,4,4,4,8,12,16")])
-
-(define_insn "*movdd_softfloat32"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r")
-	(match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))]
-  "! TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "#"
-  [(set_attr "type" "two,load,store,*,*,*")
-   (set_attr "length" "8,8,8,8,12,16")])
-
-; ld/std require word-aligned displacements -> 'Y' constraint.
-; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdd_hardfloat64_mfpgpr"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d")
-	(match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))]
-  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #
-   #
-   mftgpr %0,%1
-   mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
-
-; ld/std require word-aligned displacements -> 'Y' constraint.
-; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdd_hardfloat64"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r")
-	(match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))]
-  "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #
-   #"
-  [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
-
-(define_insn "*movdd_softfloat64"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
-	(match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
-  "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "@
-   ld%U1%X1 %0,%1
-   std%U0%X0 %1,%0
-   mr %0,%1
-   mt%0 %1
-   mf%1 %0
-   #
-   #
-   #
-   nop"
-  [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,8,12,16,4")])
-
 (define_expand "negtd2"
   [(set (match_operand:TD 0 "gpc_reg_operand" "")
 	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
@@ -423,27 +161,6 @@
   "fnabs %0,%1"
   [(set_attr "type" "fp")])
 
-(define_expand "movtd"
-  [(set (match_operand:TD 0 "general_operand" "")
-	(match_operand:TD 1 "any_operand" ""))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }")
-
-; It's important to list the Y->r and r->Y moves before r->r because
-; otherwise reload, given m->r, will try to pick r->r and reload it,
-; which doesn't make progress.
-(define_insn_and_split "*movtd_internal"
-  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-	(match_operand:TD 1 "input_operand"         "d,m,d,r,YGHF,r"))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], TDmode)
-       || gpc_reg_operand (operands[1], TDmode))"
-  "#"
-  "&& reload_completed"
-  [(pc)]
-{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,20,20,16")])
-
 ;; Hardware support for decimal floating point operations.
 
 (define_insn "extendddtd2"
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
index 2f4d1ead8f3c..1bc8ace2a5fb 100644
--- a/gcc/config/rs6000/driver-rs6000.c
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -167,7 +167,7 @@ elf_platform (void)
 
   if (fd != -1)
     {
-      char buf[1024];
+      static char buf[1024];
       ElfW(auxv_t) *av;
       ssize_t n;
 
diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md
new file mode 100644
index 000000000000..83bf71974830
--- /dev/null
+++ b/gcc/config/rs6000/power8.md
@@ -0,0 +1,373 @@
+;; Scheduling description for IBM POWER8 processor.
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
+
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
+		  du5_power8,du6_power8"  "power8misc")
+
+
+; Dispatch group reservations
+(define_reservation "DU_any_power8"
+		    "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
+		     du5_power8")
+
+; 2-way Cracked instructions go in slots 0-1
+;   (can also have a second in slots 3-4 if insns are adjacent)
+(define_reservation "DU_cracked_power8"
+		    "du0_power8+du1_power8")
+
+; Insns that are first in group
+(define_reservation "DU_first_power8"
+		    "du0_power8")
+
+; Insns that are first and last in group
+(define_reservation "DU_both_power8"
+		    "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
+		     du5_power8+du6_power8")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
+	      du5_power8,du6_power8")
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
+	      du6_power8")
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
+(absence_set "du4_power8" "du5_power8,du6_power8")
+(absence_set "du5_power8" "du6_power8")
+
+
+; Execution unit reservations
+(define_reservation "FXU_power8"
+                    "fxu0_power8|fxu1_power8")
+
+(define_reservation "LU_power8"
+                    "lu0_power8|lu1_power8")
+
+(define_reservation "LSU_power8"
+                    "lsu0_power8|lsu1_power8")
+
+(define_reservation "LU_or_LSU_power8"
+                    "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
+
+(define_reservation "VSU_power8"
+                    "vsu0_power8|vsu1_power8")
+
+
+; LS Unit
+(define_insn_reservation "power8-load" 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-load-update" 3
+  (and (eq_attr "type" "load_u,load_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
+
+(define_insn_reservation "power8-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
+
+(define_insn_reservation "power8-load-ext-update" 3
+  (and (eq_attr "type" "load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-fpload" 5
+  (and (eq_attr "type" "fpload,vecload")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_power8")
+
+(define_insn_reservation "power8-fpload-update" 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_power8+FXU_power8")
+
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
+  (and (eq_attr "type" "store,store_u")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-store-update-indexed" 5
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-fpstore" 5
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-fpstore-update" 5
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-vecstore" 5
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-sync" 1
+  (and (eq_attr "type" "sync,isync")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8")
+
+
+; FX Unit
+(define_insn_reservation "power8-1cyc" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts,isel")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 2 "power8-1cyc"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+;		 "power8-load,power8-load-update,power8-load-ext,\
+;		  power8-load-ext-update,power8-fpload,power8-fpload-update,\
+;		  power8-store,power8-store-update,power8-store-update-indexed,\
+;		  power8-fpstore,power8-fpstore-update,power8-vecstore,\
+;		  power8-larx,power8-stcx")
+
+(define_insn_reservation "power8-2cyc" 2
+  (and (eq_attr "type" "cntlz,popcnt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
+
+; cmp - Normal compare insns
+(define_insn_reservation "power8-cmp" 2
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; fast_compare : add./and./nor./etc
+(define_insn_reservation "power8-fast-compare" 2
+  (and (eq_attr "type" "fast_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; compare : rldicl./exts./etc
+; delayed_compare : rlwinm./slwi./etc
+; var_delayed_compare : rlwnm./slw./etc
+(define_insn_reservation "power8-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 3 "power8-fast-compare,power8-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 5 cycle CR latency 
+(define_bypass 5 "power8-fast-compare,power8-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+(define_insn_reservation "power8-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-mul-compare" 4
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 5 "power8-mul,power8-mul-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 7 cycle CR latency 
+(define_bypass 7 "power8-mul,power8-mul-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+; FXU divides are not pipelined
+(define_insn_reservation "power8-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
+
+(define_insn_reservation "power8-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
+
+(define_insn_reservation "power8-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,FXU_power8")
+
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
+(define_insn_reservation "power8-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,FXU_power8")
+
+
+; CR Unit
+(define_insn_reservation "power8-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8+FXU_power8")
+
+(define_insn_reservation "power8-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcr" 5
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+
+; BR Unit
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
+; prevent other insns from grabbing them once this is assigned.
+(define_insn_reservation "power8-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power8"))
+  "(du6_power8\
+   |du5_power8+du6_power8\
+   |du4_power8+du5_power8+du6_power8\
+   |du3_power8+du4_power8+du5_power8+du6_power8\
+   |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
+    du6_power8),bpu_power8")
+
+; Branch updating LR/CTR feeding mf[lr|ctr]
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
+(define_insn_reservation "power8-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+; Additional 3 cycles for any CR result
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
+
+(define_insn_reservation "power8-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sdiv" 27
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sqrt" 32
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-dsqrt" 44
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecsimple" 2
+  (and (eq_attr "type" "vecperm,vecsimple,veccmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecnormal" 6
+  (and (eq_attr "type" "vecfloat,vecdouble")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_bypass 7 "power8-vecnormal"
+		 "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
+		  power8-vecstore")
+
+(define_insn_reservation "power8-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecfdiv" 25
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecdiv" 31
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mffgpr" 5
+  (and (eq_attr "type" "mffgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mftgpr" 6
+  (and (eq_attr "type" "mftgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-crypto" 7
+  (and (eq_attr "type" "crypto")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 07944db10895..c69f5d6b2609 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -166,6 +166,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
 
+;; Match op = 0..15
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
 ;; Return 1 if op is a register that is not special.
 (define_predicate "gpc_reg_operand"
   (match_operand 0 "register_operand")
@@ -182,9 +187,68 @@
   if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
     return 1;
 
+  if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
+    return 1;
+
   return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a general purpose register.  Unlike gpc_reg_operand, don't
+;; allow floating point or vector registers.
+(define_predicate "int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return INT_REGNO_P (REGNO (op));
+})
+
+;; Like int_reg_operand, but only return true for base registers
+(define_predicate "base_reg_operand"
+  (match_operand 0 "int_reg_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  return (REGNO (op) != FIRST_GPR_REGNO);
+})
+
+;; Return 1 if op is a general purpose register that is an even register
+;; which suitable for a load/store quad operation
+(define_predicate "quad_int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  HOST_WIDE_INT r;
+
+  if (!TARGET_QUAD_MEMORY)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  if (r >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return (INT_REGNO_P (r) && ((r & 1) == 0));
+})
+
 ;; Return 1 if op is a register that is a condition register field.
 (define_predicate "cc_reg_operand"
   (match_operand 0 "register_operand")
@@ -315,6 +379,11 @@
 		   && CONST_DOUBLE_HIGH (op) == 0")
       (match_operand 0 "gpc_reg_operand"))))
 
+;; Like reg_or_logical_cint_operand, but allow vsx registers
+(define_predicate "vsx_reg_or_cint_operand"
+  (ior (match_operand 0 "vsx_register_operand")
+       (match_operand 0 "reg_or_logical_cint_operand")))
+
 ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
 ;; with no more than one instruction per word.
 (define_predicate "easy_fp_constant"
@@ -333,6 +402,11 @@
       && mode != DImode)
     return 1;
 
+  /* The constant 0.0 is easy under VSX.  */
+  if ((mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode)
+      && VECTOR_UNIT_VSX_P (DFmode) && op == CONST0_RTX (mode))
+    return 1;
+
   if (DECIMAL_FLOAT_MODE_P (mode))
     return 0;
 
@@ -521,6 +595,54 @@
   (and (match_operand 0 "memory_operand")
        (match_test "offsettable_nonstrict_memref_p (op)")))
 
+;; Return 1 if the operand is suitable for load/store quad memory.
+(define_predicate "quad_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, op0, op1;
+  int ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = 0;
+
+  else if (!memory_operand (op, mode))
+    ret = 0;
+
+  else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
+    ret = 0;
+
+  else if (MEM_ALIGN (op) < 128)
+    ret = 0;
+
+  else
+    {
+      addr = XEXP (op, 0);
+      if (int_reg_operand (addr, Pmode))
+	ret = 1;
+
+      else if (GET_CODE (addr) != PLUS)
+	ret = 0;
+
+      else
+	{
+	  op0 = XEXP (addr, 0);
+	  op1 = XEXP (addr, 1);
+	  ret = (int_reg_operand (op0, Pmode)
+		 && GET_CODE (op1) == CONST_INT
+		 && IN_RANGE (INTVAL (op1), -32768, 32767)
+		 && (INTVAL (op1) & 15) == 0);
+	}
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
+      debug_rtx (op);
+    }
+
+  return ret;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand.
 (define_predicate "indexed_or_indirect_operand"
   (match_code "mem")
@@ -535,6 +657,19 @@
   return indexed_or_indirect_address (op, mode);
 })
 
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
+;; moves are supported.
+(define_predicate "reg_or_indexed_operand"
+  (match_code "mem,reg")
+{
+  if (MEM_P (op))
+    return indexed_or_indirect_operand (op, mode);
+  else if (TARGET_DIRECT_MOVE)
+    return register_operand (op, mode);
+  return
+    0;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand with an
 ;; AND -16 in it, used to recognize when we need to switch to Altivec loads
 ;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
@@ -560,6 +695,28 @@
 			&& REG_P (XEXP (op, 1)))")
        (match_operand 0 "address_operand")))
 
+;; Return 1 if the operand is an index-form address.
+(define_special_predicate "indexed_address"
+  (match_test "(GET_CODE (op) == PLUS
+		&& REG_P (XEXP (op, 0))
+		&& REG_P (XEXP (op, 1)))"))
+
+;; Return 1 if the operand is a MEM with an update-form address. This may
+;; also include update-indexed form.
+(define_special_predicate "update_address_mem"
+  (match_test "(MEM_P (op)
+		&& (GET_CODE (XEXP (op, 0)) == PRE_INC
+		    || GET_CODE (XEXP (op, 0)) == PRE_DEC
+		    || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))"))
+
+;; Return 1 if the operand is a MEM with an update-indexed-form address. Note
+;; that PRE_INC/PRE_DEC will always be non-indexed (i.e. non X-form) since the
+;; increment is based on the mode size and will therefor always be a const.
+(define_special_predicate "update_indexed_address_mem"
+  (match_test "(MEM_P (op)
+		&& GET_CODE (XEXP (op, 0)) == PRE_MODIFY
+		&& indexed_address (XEXP (XEXP (op, 0), 1), mode))"))
+
 ;; Used for the destination of the fix_truncdfsi2 expander.
 ;; If stfiwx will be used, the result goes to memory; otherwise,
 ;; we're going to emit a store and a load of a subreg, so the dest is a
@@ -1534,3 +1691,99 @@
 
   return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
 })
+
+;; Match the first insn (addis) in fusing the combination of addis and loads to
+;; GPR registers on power8.
+(define_predicate "fusion_gpr_addis"
+  (match_code "const_int,high,plus")
+{
+  HOST_WIDE_INT value;
+  rtx int_const;
+
+  if (GET_CODE (op) == HIGH)
+    return 1;
+
+  if (CONST_INT_P (op))
+    int_const = op;
+
+  else if (GET_CODE (op) == PLUS
+	   && base_reg_operand (XEXP (op, 0), Pmode)
+	   && CONST_INT_P (XEXP (op, 1)))
+    int_const = XEXP (op, 1);
+
+  else
+    return 0;
+
+  /* Power8 currently will only do the fusion if the top 11 bits of the addis
+     value are all 1's or 0's.  */
+  value = INTVAL (int_const);
+  if ((value & (HOST_WIDE_INT)0xffff) != 0)
+    return 0;
+
+  if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
+    return 0;
+
+  return (IN_RANGE (value >> 16, -32, 31));
+})
+
+;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
+;; and loads to GPR registers on power8.
+(define_predicate "fusion_gpr_mem_load"
+  (match_code "mem,sign_extend,zero_extend")
+{
+  rtx addr;
+
+  /* Handle sign/zero extend.  */
+  if (GET_CODE (op) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
+    {
+      op = XEXP (op, 0);
+      mode = GET_MODE (op);
+    }
+
+  if (!MEM_P (op))
+    return 0;
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      break;
+
+    case DImode:
+      if (!TARGET_POWERPC64)
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      return (base_reg_operand (base, GET_MODE (base))
+	      && satisfies_constraint_I (offset));
+    }
+
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      if (!base_reg_operand (base, GET_MODE (base)))
+	return 0;
+
+      else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+	return small_toc_ref (offset, GET_MODE (offset));
+
+      else if (TARGET_ELF && !TARGET_POWERPC64)
+	return CONSTANT_P (offset);
+    }
+
+  return 0;
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a545fe3e448e..971495ff47b3 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -30,7 +30,7 @@
    RS6000_BUILTIN_A -- ABS builtins
    RS6000_BUILTIN_D -- DST builtins
    RS6000_BUILTIN_E -- SPE EVSEL builtins.
-   RS6000_BUILTIN_P -- Altivec and VSX predicate builtins
+   RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
    RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
    RS6000_BUILTIN_S -- SPE predicate builtins
    RS6000_BUILTIN_X -- special builtins
@@ -301,6 +301,108 @@
 		     | RS6000_BTC_SPECIAL),				\
 		    CODE_FOR_nothing)			/* ICODE */
 
+/* ISA 2.07 (power8) vector convenience macros.  */
+/* For the instructions that are encoded as altivec instructions use
+   __builtin_altivec_ as the builtin name.  */
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
+   builtin name.  */
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_1(ENUM, NAME)					\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_2(ENUM, NAME)					\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* Crypto convenience macros.  */
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
 /* SPE convenience macros.  */
 #define BU_SPE_1(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -1012,7 +1114,7 @@ BU_VSX_1 (XVTSQRTSP_FG,	      "xvtsqrtsp_fg",	CONST,	vsx_tsqrtv4sf2_fg)
 BU_VSX_1 (XVRESP,	      "xvresp",		CONST,	vsx_frev4sf2)
 
 BU_VSX_1 (XSCVDPSP,	      "xscvdpsp",	CONST,	vsx_xscvdpsp)
-BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvdpsp)
+BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvspdp)
 BU_VSX_1 (XVCVDPSP,	      "xvcvdpsp",	CONST,	vsx_xvcvdpsp)
 BU_VSX_1 (XVCVSPDP,	      "xvcvspdp",	CONST,	vsx_xvcvspdp)
 BU_VSX_1 (XSTSQRTDP_FE,	      "xstsqrtdp_fe",	CONST,	vsx_tsqrtdf2_fe)
@@ -1052,9 +1154,9 @@ BU_VSX_1 (XVRSPIZ,	      "xvrspiz",	CONST,	vsx_btruncv4sf2)
 
 BU_VSX_1 (XSRDPI,	      "xsrdpi",		CONST,	vsx_xsrdpi)
 BU_VSX_1 (XSRDPIC,	      "xsrdpic",	CONST,	vsx_xsrdpic)
-BU_VSX_1 (XSRDPIM,	      "xsrdpim",	CONST,	vsx_floordf2)
-BU_VSX_1 (XSRDPIP,	      "xsrdpip",	CONST,	vsx_ceildf2)
-BU_VSX_1 (XSRDPIZ,	      "xsrdpiz",	CONST,	vsx_btruncdf2)
+BU_VSX_1 (XSRDPIM,	      "xsrdpim",	CONST,	floordf2)
+BU_VSX_1 (XSRDPIP,	      "xsrdpip",	CONST,	ceildf2)
+BU_VSX_1 (XSRDPIZ,	      "xsrdpiz",	CONST,	btruncdf2)
 
 /* VSX predicate functions.  */
 BU_VSX_P (XVCMPEQSP_P,	      "xvcmpeqsp_p",	CONST,	vector_eq_v4sf_p)
@@ -1131,6 +1233,139 @@ BU_VSX_OVERLOAD_2 (XXSPLTW,  "xxspltw")
 /* VSX builtins that are handled as special cases.  */
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
+
+/* 1 argument VSX instructions added in ISA 2.07.  */
+BU_P8V_VSX_1 (XSCVSPDPN,      "xscvspdpn",	CONST,	vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN,      "xscvdpspn",	CONST,	vsx_xscvdpspn)
+
+/* 1 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_1 (ABS_V2DI,	      "abs_v2di",	CONST,	absv2di2)
+BU_P8V_AV_1 (VUPKHSW,	      "vupkhsw",	CONST,	altivec_vupkhsw)
+BU_P8V_AV_1 (VUPKLSW,	      "vupklsw",	CONST,	altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB,	      "vclzb",		CONST,  clzv16qi2)
+BU_P8V_AV_1 (VCLZH,	      "vclzh",		CONST,  clzv8hi2)
+BU_P8V_AV_1 (VCLZW,	      "vclzw",		CONST,  clzv4si2)
+BU_P8V_AV_1 (VCLZD,	      "vclzd",		CONST,  clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB,	      "vpopcntb",	CONST,  popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH,	      "vpopcnth",	CONST,  popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW,	      "vpopcntw",	CONST,  popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD,	      "vpopcntd",	CONST,  popcountv2di2)
+BU_P8V_AV_1 (VGBBD,	      "vgbbd",		CONST,  p8v_vgbbd)
+
+/* 2 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VADDUDM,		"vaddudm",	CONST,	addv2di3)
+BU_P8V_AV_2 (VMINSD,		"vminsd",	CONST,	sminv2di3)
+BU_P8V_AV_2 (VMAXSD,		"vmaxsd",	CONST,	smaxv2di3)
+BU_P8V_AV_2 (VMINUD,		"vminud",	CONST,	uminv2di3)
+BU_P8V_AV_2 (VMAXUD,		"vmaxud",	CONST,	umaxv2di3)
+BU_P8V_AV_2 (VMRGEW,		"vmrgew",	CONST,	p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW,		"vmrgow",	CONST,	p8_vmrgow)
+BU_P8V_AV_2 (VPKUDUM,		"vpkudum",	CONST,	altivec_vpkudum)
+BU_P8V_AV_2 (VPKSDSS,		"vpksdss",	CONST,	altivec_vpksdss)
+BU_P8V_AV_2 (VPKUDUS,		"vpkudus",	CONST,	altivec_vpkudus)
+BU_P8V_AV_2 (VPKSDUS,		"vpksdus",	CONST,	altivec_vpkswus)
+BU_P8V_AV_2 (VRLD,		"vrld",		CONST,	vrotlv2di3)
+BU_P8V_AV_2 (VSLD,		"vsld",		CONST,	vashlv2di3)
+BU_P8V_AV_2 (VSRD,		"vsrd",		CONST,	vlshrv2di3)
+BU_P8V_AV_2 (VSRAD,		"vsrad",	CONST,	vashrv2di3)
+BU_P8V_AV_2 (VSUBUDM,		"vsubudm",	CONST,	subv2di3)
+
+BU_P8V_AV_2 (EQV_V16QI,		"eqv_v16qi",	CONST,	eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI,		"eqv_v8hi",	CONST,	eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI,		"eqv_v4si",	CONST,	eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI,		"eqv_v2di",	CONST,	eqvv2di3)
+BU_P8V_AV_2 (EQV_V4SF,		"eqv_v4sf",	CONST,	eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF,		"eqv_v2df",	CONST,	eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI,	"nand_v16qi",	CONST,	nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI,		"nand_v8hi",	CONST,	nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI,		"nand_v4si",	CONST,	nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI,		"nand_v2di",	CONST,	nandv2di3)
+BU_P8V_AV_2 (NAND_V4SF,		"nand_v4sf",	CONST,	nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF,		"nand_v2df",	CONST,	nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI,		"orc_v16qi",	CONST,	orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI,		"orc_v8hi",	CONST,	orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI,		"orc_v4si",	CONST,	orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI,		"orc_v2di",	CONST,	orcv2di3)
+BU_P8V_AV_2 (ORC_V4SF,		"orc_v4sf",	CONST,	orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF,		"orc_v2df",	CONST,	orcv2df3)
+
+/* Vector comparison instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VCMPEQUD,		"vcmpequd",	CONST,	vector_eqv2di)
+BU_P8V_AV_2 (VCMPGTSD,		"vcmpgtsd",	CONST,	vector_gtv2di)
+BU_P8V_AV_2 (VCMPGTUD,		"vcmpgtud",	CONST,	vector_gtuv2di)
+
+/* Vector comparison predicate instructions added in ISA 2.07.  */
+BU_P8V_AV_P (VCMPEQUD_P,	"vcmpequd_p",	CONST,	vector_eq_v2di_p)
+BU_P8V_AV_P (VCMPGTSD_P,	"vcmpgtsd_p",	CONST,	vector_gt_v2di_p)
+BU_P8V_AV_P (VCMPGTUD_P,	"vcmpgtud_p",	CONST,	vector_gtu_v2di_p)
+
+/* ISA 2.07 vector overloaded 1 argument functions.  */
+BU_P8V_OVERLOAD_1 (VUPKHSW,	"vupkhsw")
+BU_P8V_OVERLOAD_1 (VUPKLSW,	"vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ,	"vclz")
+BU_P8V_OVERLOAD_1 (VCLZB,	"vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH,	"vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW,	"vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD,	"vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT,	"vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB,	"vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH,	"vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW,	"vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD,	"vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD,	"vgbbd")
+
+/* ISA 2.07 vector overloaded 2 argument functions.  */
+BU_P8V_OVERLOAD_2 (EQV,		"eqv")
+BU_P8V_OVERLOAD_2 (NAND,	"nand")
+BU_P8V_OVERLOAD_2 (ORC,		"orc")
+BU_P8V_OVERLOAD_2 (VADDUDM,	"vaddudm")
+BU_P8V_OVERLOAD_2 (VMAXSD,	"vmaxsd")
+BU_P8V_OVERLOAD_2 (VMAXUD,	"vmaxud")
+BU_P8V_OVERLOAD_2 (VMINSD,	"vminsd")
+BU_P8V_OVERLOAD_2 (VMINUD,	"vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW,	"vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW,	"vmrgow")
+BU_P8V_OVERLOAD_2 (VPKSDSS,	"vpksdss")
+BU_P8V_OVERLOAD_2 (VPKSDUS,	"vpksdus")
+BU_P8V_OVERLOAD_2 (VPKUDUM,	"vpkudum")
+BU_P8V_OVERLOAD_2 (VPKUDUS,	"vpkudus")
+BU_P8V_OVERLOAD_2 (VRLD,	"vrld")
+BU_P8V_OVERLOAD_2 (VSLD,	"vsld")
+BU_P8V_OVERLOAD_2 (VSRAD,	"vsrad")
+BU_P8V_OVERLOAD_2 (VSRD,	"vsrd")
+BU_P8V_OVERLOAD_2 (VSUBUDM,	"vsubudm")
+
+
+/* 1 argument crypto functions.  */
+BU_CRYPTO_1 (VSBOX,		"vsbox",	  CONST, crypto_vsbox)
+
+/* 2 argument crypto functions.  */
+BU_CRYPTO_2 (VCIPHER,		"vcipher",	  CONST, crypto_vcipher)
+BU_CRYPTO_2 (VCIPHERLAST,	"vcipherlast",	  CONST, crypto_vcipherlast)
+BU_CRYPTO_2 (VNCIPHER,		"vncipher",	  CONST, crypto_vncipher)
+BU_CRYPTO_2 (VNCIPHERLAST,	"vncipherlast",	  CONST, crypto_vncipherlast)
+BU_CRYPTO_2 (VPMSUMB,		"vpmsumb",	  CONST, crypto_vpmsumb)
+BU_CRYPTO_2 (VPMSUMH,		"vpmsumh",	  CONST, crypto_vpmsumh)
+BU_CRYPTO_2 (VPMSUMW,		"vpmsumw",	  CONST, crypto_vpmsumw)
+BU_CRYPTO_2 (VPMSUMD,		"vpmsumd",	  CONST, crypto_vpmsumd)
+
+/* 3 argument crypto functions.  */
+BU_CRYPTO_3 (VPERMXOR_V2DI,	"vpermxor_v2di",  CONST, crypto_vpermxor_v2di)
+BU_CRYPTO_3 (VPERMXOR_V4SI,	"vpermxor_v4si",  CONST, crypto_vpermxor_v4si)
+BU_CRYPTO_3 (VPERMXOR_V8HI,	"vpermxor_v8hi",  CONST, crypto_vpermxor_v8hi)
+BU_CRYPTO_3 (VPERMXOR_V16QI,	"vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
+BU_CRYPTO_3 (VSHASIGMAW,	"vshasigmaw",	  CONST, crypto_vshasigmaw)
+BU_CRYPTO_3 (VSHASIGMAD,	"vshasigmad",	  CONST, crypto_vshasigmad)
+
+/* 2 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_2 (VPMSUM,	 "vpmsum")
+
+/* 3 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR,	 "vpermxor")
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
+
 
 /* 3 argument paired floating point builtins.  */
 BU_PAIRED_3 (MSUB,            "msub",           FP, 	fmsv2sf4)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index a4f66ba8f1bd..562fca3724a6 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
   if ((flags & OPTION_MASK_POPCNTD) != 0)
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
+  if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
   if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
     rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
   if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
@@ -331,6 +333,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     }
   if ((flags & OPTION_MASK_VSX) != 0)
     rs6000_define_or_undefine_macro (define_p, "__VSX__");
+  if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+  if ((flags & OPTION_MASK_CRYPTO) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
 
   /* options from the builtin masks.  */
   if ((bu_mask & RS6000_BTM_SPE) != 0)
@@ -505,6 +511,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
@@ -577,12 +585,24 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
@@ -601,6 +621,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
@@ -651,6 +675,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
@@ -937,6 +973,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
@@ -975,6 +1015,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
@@ -1021,6 +1065,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
@@ -1418,6 +1466,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
@@ -1604,6 +1664,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
@@ -1786,6 +1858,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
     RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
     RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
@@ -1812,6 +1890,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
     RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
@@ -1824,6 +1906,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
@@ -1844,6 +1928,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
@@ -1868,6 +1956,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
@@ -2032,6 +2124,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
@@ -2056,6 +2152,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
@@ -2196,6 +2296,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
@@ -3327,6 +3439,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
@@ -3372,11 +3498,455 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
 
+  /* Power8 vector overloaded functions.  */
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, 0, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
+  /* Crypto builtins.  */
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+
   { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
 };
 
@@ -3824,7 +4394,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
 	    || rs6000_builtin_type_compatible (types[1], desc->op2))
 	&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
-	    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	    || rs6000_builtin_type_compatible (types[2], desc->op3))
+	&& rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
       return altivec_build_resolved_builtin (args, n, desc);
 
  bad:
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 3f17c6f2395f..93714d2f1dfc 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -28,7 +28,7 @@
      ALTIVEC, since in general it isn't a win on power6.  In ISA 2.04, fsel,
      fre, fsqrt, etc. were no longer documented as optional.  Group masks by
      server and embedded. */
-#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_2_MASKS				\
+#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_4_MASKS				\
 				 | OPTION_MASK_CMPB			\
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_PPC_GFXOPT		\
@@ -38,12 +38,22 @@
 
   /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
      altivec is a win so enable it.  */
+  /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until
+     PR 58587 is fixed.  */
 #define ISA_2_6_MASKS_EMBEDDED	(ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD)
 #define ISA_2_6_MASKS_SERVER	(ISA_2_5_MASKS_SERVER			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_ALTIVEC			\
 				 | OPTION_MASK_VSX)
 
+/* For now, don't provide an embedded version of ISA 2.07.  */
+#define ISA_2_7_MASKS_SERVER	(ISA_2_6_MASKS_SERVER			\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
+				 | OPTION_MASK_CRYPTO			\
+				 | OPTION_MASK_DIRECT_MOVE		\
+				 | OPTION_MASK_QUAD_MEMORY)
+
 #define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
 /* Deal with ports that do not have -mstrict-align.  */
@@ -60,7 +70,9 @@
 /* Mask of all options to set the default isa flags based on -mcpu=<xxx>.  */
 #define POWERPC_MASKS		(OPTION_MASK_ALTIVEC			\
 				 | OPTION_MASK_CMPB			\
+				 | OPTION_MASK_CRYPTO			\
 				 | OPTION_MASK_DFP			\
+				 | OPTION_MASK_DIRECT_MOVE		\
 				 | OPTION_MASK_DLMZB			\
 				 | OPTION_MASK_FPRND			\
 				 | OPTION_MASK_ISEL			\
@@ -68,15 +80,19 @@
 				 | OPTION_MASK_MFPGPR			\
 				 | OPTION_MASK_MULHW			\
 				 | OPTION_MASK_NO_UPDATE		\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
 				 | OPTION_MASK_POPCNTB			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_POWERPC64		\
 				 | OPTION_MASK_PPC_GFXOPT		\
 				 | OPTION_MASK_PPC_GPOPT		\
+				 | OPTION_MASK_QUAD_MEMORY		\
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
-				 | OPTION_MASK_VSX)
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_VSX_TIMODE)
 
 #endif
 
@@ -166,10 +182,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
 	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
 	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
 	    | MASK_VSX | MASK_RECIP_PRECISION)
-RS6000_CPU ("power8", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
-	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
-	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
-	    | MASK_VSX | MASK_RECIP_PRECISION)
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
 RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
 RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
 RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index bc18f8a168c2..5124e1665d4f 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -41,3 +41,8 @@ VECTOR_MODE (INT, DI, 1);
 VECTOR_MODES (FLOAT, 8);      /*             V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF  V4SF V2DF */
 VECTOR_MODES (FLOAT, 32);     /*       V16HF V8SF V4DF */
+
+/* Replacement for TImode that only is allowed in GPRs.  We also use PTImode
+   for quad memory atomic operations to force getting an even/odd register
+   combination.  */
+PARTIAL_INT_MODE (TI);
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index fc843fd19cab..e143a4ca2037 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -59,7 +59,8 @@ enum processor_type
    PROCESSOR_POWER7,
    PROCESSOR_CELL,
    PROCESSOR_PPCA2,
-   PROCESSOR_TITAN
+   PROCESSOR_TITAN,
+   PROCESSOR_POWER8
 };
 
 /* FP processor type.  */
@@ -131,11 +132,14 @@ enum rs6000_cmodel {
   CMODEL_LARGE
 };
 
-/* Describe which vector unit to use for a given machine mode.  */
+/* Describe which vector unit to use for a given machine mode.  The
+   VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
+   P8_VECTOR are contiguous.  */
 enum rs6000_vector {
   VECTOR_NONE,			/* Type is not  a vector or not supported */
   VECTOR_ALTIVEC,		/* Use altivec for vector processing */
   VECTOR_VSX,			/* Use VSX for vector processing */
+  VECTOR_P8_VECTOR,		/* Use ISA 2.07 VSX for vector processing */
   VECTOR_PAIRED,		/* Use paired floating point for vectors */
   VECTOR_SPE,			/* Use SPE for vector processing */
   VECTOR_OTHER			/* Some other vector unit */
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d9bcf1a41ede..5c315470f34c 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx);
 extern rtx find_addr_reg (rtx);
 extern rtx gen_easy_altivec_constant (rtx);
 extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
@@ -70,6 +71,11 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
 extern int registers_ok_for_quad_peep (rtx, rtx);
 extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
+extern bool fusion_gpr_load_p (rtx *, bool);
+extern void expand_fusion_gpr_load (rtx *);
+extern const char *emit_fusion_gpr_load (rtx *);
 extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
 							    enum reg_class);
 extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
@@ -135,6 +141,7 @@ extern rtx rs6000_address_for_fpconvert (rtx);
 extern rtx rs6000_address_for_altivec (rtx);
 extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
 extern int rs6000_loop_align (rtx);
+extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool, rtx);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0325870af97c..13333148411b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -189,9 +189,6 @@ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
 /* Map register number to register class.  */
 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
 
-/* Reload functions based on the type and the vector unit.  */
-static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
-
 static int dbg_cost_ctrl;
 
 /* Built in types.  */
@@ -289,6 +286,105 @@ static struct
    don't link in rs6000-c.c, so we can't call it directly.  */
 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
 
+/* Simplfy register classes into simpler classifications.  We assume
+   GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+   check for standard register classes (gpr/floating/altivec/vsx) and
+   floating/vector classes (float/altivec/vsx).  */
+
+enum rs6000_reg_type {
+  NO_REG_TYPE,
+  PSEUDO_REG_TYPE,
+  GPR_REG_TYPE,
+  VSX_REG_TYPE,
+  ALTIVEC_REG_TYPE,
+  FPR_REG_TYPE,
+  SPR_REG_TYPE,
+  CR_REG_TYPE,
+  SPE_ACC_TYPE,
+  SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type.  */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+   purpose, floating point, altivec, and VSX registers).  */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+
+/* Register classes we care about in secondary reload or go if legitimate
+   address.  We only need to worry about GPR, FPR, and Altivec registers here,
+   along an ANY field that is the OR of the 3 register classes.  */
+
+enum rs6000_reload_reg_type {
+  RELOAD_REG_GPR,			/* General purpose registers.  */
+  RELOAD_REG_FPR,			/* Traditional floating point regs.  */
+  RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
+  RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
+  N_RELOAD_REG
+};
+
+/* For setting up register classes, loop through the 3 register classes mapping
+   into real registers, and skip the ANY class, which is just an OR of the
+   bits.  */
+#define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
+#define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
+
+/* Map reload register type to a register in the register class.  */
+struct reload_reg_map_type {
+  const char *name;			/* Register class name.  */
+  int reg;				/* Register in the register class.  */
+};
+
+static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
+  { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
+  { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
+  { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
+  { "Any",	-1 },			/* RELOAD_REG_ANY.  */
+};
+
+/* Mask bits for each register class, indexed per mode.  Historically the
+   compiler has been more restrictive which types can do PRE_MODIFY instead of
+   PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
+typedef unsigned char addr_mask_type;
+
+#define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
+#define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
+#define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
+#define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
+#define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
+#define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
+
+/* Register type masks based on the type, of valid addressing modes.  */
+struct rs6000_reg_addr {
+  enum insn_code reload_load;		/* INSN to reload for loading. */
+  enum insn_code reload_store;		/* INSN to reload for storing.  */
+  enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
+  enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
+  enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
+  addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
+};
+
+static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
+
+/* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
+static inline bool
+mode_supports_pre_incdec_p (enum machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
+	  != 0);
+}
+
+/* Helper function to say whether a mode supports PRE_MODIFY.  */
+static inline bool
+mode_supports_pre_modify_p (enum machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
+	  != 0);
+}
+
 
 /* Target cpu costs.  */
 
@@ -828,6 +924,25 @@ struct processor_costs power7_cost = {
   12,			/* prefetch streams */
 };
 
+/* Instruction costs on POWER8 processors.  */
+static const
+struct processor_costs power8_cost = {
+  COSTS_N_INSNS (3),	/* mulsi */
+  COSTS_N_INSNS (3),	/* mulsi_const */
+  COSTS_N_INSNS (3),	/* mulsi_const9 */
+  COSTS_N_INSNS (3),	/* muldi */
+  COSTS_N_INSNS (19),	/* divsi */
+  COSTS_N_INSNS (35),	/* divdi */
+  COSTS_N_INSNS (3),	/* fp */
+  COSTS_N_INSNS (3),	/* dmul */
+  COSTS_N_INSNS (14),	/* sdiv */
+  COSTS_N_INSNS (17),	/* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
 /* Instruction costs on POWER A2 processors.  */
 static const
 struct processor_costs ppca2_cost = {
@@ -1020,6 +1135,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
 static void rs6000_print_builtin_options (FILE *, int, const char *,
 					  HOST_WIDE_INT);
 
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+					  enum rs6000_reg_type,
+					  enum machine_mode,
+					  secondary_reload_info *,
+					  bool);
+
 /* Hash table stuff for keeping track of TOC entries.  */
 
 struct GTY(()) toc_hash_struct
@@ -1513,8 +1635,9 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
 {
   unsigned HOST_WIDE_INT reg_size;
 
+  /* TF/TD modes are special in that they always take 2 registers.  */
   if (FP_REGNO_P (regno))
-    reg_size = (VECTOR_MEM_VSX_P (mode)
+    reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
 		? UNITS_PER_VSX_WORD
 		: UNITS_PER_FP_WORD);
 
@@ -1546,16 +1669,38 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
 {
   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
 
+  /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
+     register combinations, and use PTImode where we need to deal with quad
+     word memory operations.  Don't allow quad words in the argument or frame
+     pointer registers, just registers 0..31.  */
+  if (mode == PTImode)
+    return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && ((regno & 1) == 0));
+
   /* VSX registers that overlap the FPR registers are larger than for non-VSX
      implementations.  Don't allow an item to be split between a FP register
-     and an Altivec register.  */
-  if (VECTOR_MEM_VSX_P (mode))
+     and an Altivec register.  Allow TImode in all VSX registers if the user
+     asked for it.  */
+  if (TARGET_VSX && VSX_REGNO_P (regno)
+      && (VECTOR_MEM_VSX_P (mode)
+	  || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
+	  || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
+	  || (TARGET_VSX_TIMODE && mode == TImode)))
     {
       if (FP_REGNO_P (regno))
 	return FP_REGNO_P (last_regno);
 
       if (ALTIVEC_REGNO_P (regno))
-	return ALTIVEC_REGNO_P (last_regno);
+	{
+	  if (mode == SFmode && !TARGET_UPPER_REGS_SF)
+	    return 0;
+
+	  if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
+	    return 0;
+
+	  return ALTIVEC_REGNO_P (last_regno);
+	}
     }
 
   /* The GPRs can hold any mode, but values bigger than one register
@@ -1564,8 +1709,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
     return INT_REGNO_P (last_regno);
 
   /* The float registers (except for VSX vector modes) can only hold floating
-     modes and DImode.  This excludes the 32-bit decimal float mode for
-     now.  */
+     modes and DImode.  */
   if (FP_REGNO_P (regno))
     {
       if (SCALAR_FLOAT_MODE_P (mode)
@@ -1599,9 +1743,8 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
   if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
     return 1;
 
-  /* We cannot put TImode anywhere except general register and it must be able
-     to fit within the register set.  In the future, allow TImode in the
-     Altivec or VSX registers.  */
+  /* We cannot put non-VSX TImode or PTImode anywhere except general register
+     and it must be able to fit within the register set.  */
 
   return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
 }
@@ -1668,6 +1811,16 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
 	  comma = ", ";
 	}
 
+      if (len > 70)
+	{
+	  fprintf (stderr, ",\n\t");
+	  comma = "";
+	}
+
+      len += fprintf (stderr, "%sreg-class = %s", comma,
+		      reg_class_names[(int)rs6000_regno_regclass[r]]);
+      comma = ", ";
+
       if (len > 70)
 	{
 	  fprintf (stderr, ",\n\t");
@@ -1678,6 +1831,63 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
     }
 }
 
+static const char *
+rs6000_debug_vector_unit (enum rs6000_vector v)
+{
+  const char *ret;
+
+  switch (v)
+    {
+    case VECTOR_NONE:	   ret = "none";      break;
+    case VECTOR_ALTIVEC:   ret = "altivec";   break;
+    case VECTOR_VSX:	   ret = "vsx";       break;
+    case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
+    case VECTOR_PAIRED:	   ret = "paired";    break;
+    case VECTOR_SPE:	   ret = "spe";       break;
+    case VECTOR_OTHER:	   ret = "other";     break;
+    default:		   ret = "unknown";   break;
+    }
+
+  return ret;
+}
+
+/* Print the address masks in a human readble fashion.  */
+DEBUG_FUNCTION void
+rs6000_debug_print_mode (ssize_t m)
+{
+  ssize_t rc;
+
+  fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
+  for (rc = 0; rc < N_RELOAD_REG; rc++)
+    {
+      addr_mask_type mask = reg_addr[m].addr_mask[rc];
+      fprintf (stderr,
+	       "  %s: %c%c%c%c%c%c",
+	       reload_reg_map[rc].name,
+	       (mask & RELOAD_REG_VALID)      != 0 ? 'v' : ' ',
+	       (mask & RELOAD_REG_MULTIPLE)   != 0 ? 'm' : ' ',
+	       (mask & RELOAD_REG_INDEXED)    != 0 ? 'i' : ' ',
+	       (mask & RELOAD_REG_OFFSET)     != 0 ? 'o' : ' ',
+	       (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
+	       (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
+    }
+
+  if (rs6000_vector_unit[m] != VECTOR_NONE
+      || rs6000_vector_mem[m] != VECTOR_NONE
+      || (reg_addr[m].reload_store != CODE_FOR_nothing)
+      || (reg_addr[m].reload_load != CODE_FOR_nothing))
+    {
+      fprintf (stderr,
+	       "  Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
+	       rs6000_debug_vector_unit (rs6000_vector_unit[m]),
+	       rs6000_debug_vector_unit (rs6000_vector_mem[m]),
+	       (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
+	       (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+    }
+
+  fputs ("\n", stderr);
+}
+
 #define DEBUG_FMT_ID "%-32s= "
 #define DEBUG_FMT_D   DEBUG_FMT_ID "%d\n"
 #define DEBUG_FMT_WX  DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
@@ -1690,6 +1900,7 @@ rs6000_debug_reg_global (void)
   static const char *const tf[2] = { "false", "true" };
   const char *nl = (const char *)0;
   int m;
+  size_t m1, m2, v;
   char costly_num[20];
   char nop_num[20];
   char flags_buffer[40];
@@ -1700,20 +1911,67 @@ rs6000_debug_reg_global (void)
   const char *cmodel_str;
   struct cl_target_option cl_opts;
 
-  /* Map enum rs6000_vector to string.  */
-  static const char *rs6000_debug_vector_unit[] = {
-    "none",
-    "altivec",
-    "vsx",
-    "paired",
-    "spe",
-    "other"
+  /* Modes we want tieable information on.  */
+  static const enum machine_mode print_tieable_modes[] = {
+    QImode,
+    HImode,
+    SImode,
+    DImode,
+    TImode,
+    PTImode,
+    SFmode,
+    DFmode,
+    TFmode,
+    SDmode,
+    DDmode,
+    TDmode,
+    V8QImode,
+    V4HImode,
+    V2SImode,
+    V16QImode,
+    V8HImode,
+    V4SImode,
+    V2DImode,
+    V32QImode,
+    V16HImode,
+    V8SImode,
+    V4DImode,
+    V2SFmode,
+    V4SFmode,
+    V2DFmode,
+    V8SFmode,
+    V4DFmode,
+    CCmode,
+    CCUNSmode,
+    CCEQmode,
+  };
+
+  /* Virtual regs we are interested in.  */
+  const static struct {
+    int regno;			/* register number.  */
+    const char *name;		/* register name.  */
+  } virtual_regs[] = {
+    { STACK_POINTER_REGNUM,			"stack pointer:" },
+    { TOC_REGNUM,				"toc:          " },
+    { STATIC_CHAIN_REGNUM,			"static chain: " },
+    { RS6000_PIC_OFFSET_TABLE_REGNUM,		"pic offset:   " },
+    { HARD_FRAME_POINTER_REGNUM,		"hard frame:   " },
+    { ARG_POINTER_REGNUM,			"arg pointer:  " },
+    { FRAME_POINTER_REGNUM,			"frame pointer:" },
+    { FIRST_PSEUDO_REGISTER,			"first pseudo: " },
+    { FIRST_VIRTUAL_REGISTER,			"first virtual:" },
+    { VIRTUAL_INCOMING_ARGS_REGNUM,		"incoming_args:" },
+    { VIRTUAL_STACK_VARS_REGNUM,		"stack_vars:   " },
+    { VIRTUAL_STACK_DYNAMIC_REGNUM,		"stack_dynamic:" },
+    { VIRTUAL_OUTGOING_ARGS_REGNUM,		"outgoing_args:" },
+    { VIRTUAL_CFA_REGNUM,			"cfa (frame):  " },
+    { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM,	"stack boundry:" },
+    { LAST_VIRTUAL_REGISTER,			"last virtual: " },
   };
 
-  fprintf (stderr, "Register information: (last virtual reg = %d)\n",
-	   LAST_VIRTUAL_REGISTER);
-  rs6000_debug_reg_print (0, 31, "gr");
-  rs6000_debug_reg_print (32, 63, "fp");
+  fputs ("\nHard register information:\n", stderr);
+  rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
+  rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
   rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
 			  LAST_ALTIVEC_REGNO,
 			  "vs");
@@ -1726,6 +1984,10 @@ rs6000_debug_reg_global (void)
   rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
   rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
 
+  fputs ("\nVirtual/stack/frame registers:\n", stderr);
+  for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
+    fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
+
   fprintf (stderr,
 	   "\n"
 	   "d  reg_class = %s\n"
@@ -1734,24 +1996,69 @@ rs6000_debug_reg_global (void)
 	   "wa reg_class = %s\n"
 	   "wd reg_class = %s\n"
 	   "wf reg_class = %s\n"
-	   "ws reg_class = %s\n\n",
+	   "wg reg_class = %s\n"
+	   "wl reg_class = %s\n"
+	   "wm reg_class = %s\n"
+	   "wr reg_class = %s\n"
+	   "ws reg_class = %s\n"
+	   "wt reg_class = %s\n"
+	   "wu reg_class = %s\n"
+	   "wv reg_class = %s\n"
+	   "ww reg_class = %s\n"
+	   "wx reg_class = %s\n"
+	   "wy reg_class = %s\n"
+	   "wz reg_class = %s\n"
+	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
-
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
+
+  nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
-    if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
-      {
-	nl = "\n";
-	fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n",
-		 GET_MODE_NAME (m),
-		 rs6000_debug_vector_unit[ rs6000_vector_unit[m] ],
-		 rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]);
-      }
+    rs6000_debug_print_mode (m);
+
+  fputs ("\n", stderr);
+
+  for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
+    {
+      enum machine_mode mode1 = print_tieable_modes[m1];
+      bool first_time = true;
+
+      nl = (const char *)0;
+      for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
+	{
+	  enum machine_mode mode2 = print_tieable_modes[m2];
+	  if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
+	    {
+	      if (first_time)
+		{
+		  fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
+		  nl = "\n";
+		  first_time = false;
+		}
+
+	      fprintf (stderr, " %s", GET_MODE_NAME (mode2));
+	    }
+	}
+
+      if (!first_time)
+	fputs ("\n", stderr);
+    }
 
   if (nl)
     fputs (nl, stderr);
@@ -1935,6 +2242,13 @@ rs6000_debug_reg_global (void)
   if (TARGET_LINK_STACK)
     fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
 
+  if (targetm.lra_p ())
+    fprintf (stderr, DEBUG_FMT_S, "lra", "true");
+
+  if (TARGET_P8_FUSION)
+    fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+	     (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
   fprintf (stderr, DEBUG_FMT_S, "plt-format",
 	   TARGET_SECURE_PLT ? "secure" : "bss");
   fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -1954,11 +2268,106 @@ rs6000_debug_reg_global (void)
 	   (int)RS6000_BUILTIN_COUNT);
 }
 
+
+/* Update the addr mask bits in reg_addr to help secondary reload and go if
+   legitimate address support to figure out the appropriate addressing to
+   use.  */
+
+static void
+rs6000_setup_reg_addr_masks (void)
+{
+  ssize_t rc, reg, m, nregs;
+  addr_mask_type any_addr_mask, addr_mask;
+
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    {
+      /* SDmode is special in that we want to access it only via REG+REG
+	 addressing on power7 and above, since we want to use the LFIWZX and
+	 STFIWZX instructions to load it.  */
+      bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
+
+      any_addr_mask = 0;
+      for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
+	{
+	  addr_mask = 0;
+	  reg = reload_reg_map[rc].reg;
+
+	  /* Can mode values go in the GPR/FPR/Altivec registers?  */
+	  if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
+	    {
+	      nregs = rs6000_hard_regno_nregs[m][reg];
+	      addr_mask |= RELOAD_REG_VALID;
+
+	      /* Indicate if the mode takes more than 1 physical register.  If
+		 it takes a single register, indicate it can do REG+REG
+		 addressing.  */
+	      if (nregs > 1 || m == BLKmode)
+		addr_mask |= RELOAD_REG_MULTIPLE;
+	      else
+		addr_mask |= RELOAD_REG_INDEXED;
+
+	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
+		 addressing.  Restrict addressing on SPE for 64-bit types
+		 because of the SUBREG hackery used to address 64-bit floats in
+		 '32-bit' GPRs.  To simplify secondary reload, don't allow
+		 update forms on scalar floating point types that can go in the
+		 upper registers.  */
+
+	      if (TARGET_UPDATE
+		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
+		  && GET_MODE_SIZE (m) <= 8
+		  && !VECTOR_MODE_P (m)
+		  && !COMPLEX_MODE_P (m)
+		  && !indexed_only_p
+		  && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
+		  && !(m == DFmode && TARGET_UPPER_REGS_DF)
+		  && !(m == SFmode && TARGET_UPPER_REGS_SF))
+		{
+		  addr_mask |= RELOAD_REG_PRE_INCDEC;
+
+		  /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
+		     we don't allow PRE_MODIFY for some multi-register
+		     operations.  */
+		  switch (m)
+		    {
+		    default:
+		      addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+
+		    case DImode:
+		      if (TARGET_POWERPC64)
+			addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+
+		    case DFmode:
+		    case DDmode:
+		      if (TARGET_DF_INSN)
+			addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+		    }
+		}
+	    }
+
+	  /* GPR and FPR registers can do REG+OFFSET addressing, except
+	     possibly for SDmode.  */
+	  if ((addr_mask != 0) && !indexed_only_p
+	      && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
+	    addr_mask |= RELOAD_REG_OFFSET;
+
+	  reg_addr[m].addr_mask[rc] = addr_mask;
+	  any_addr_mask |= addr_mask;
+	}
+
+      reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
+    }
+}
+
+
 /* Initialize the various global tables that are based on register size.  */
 static void
 rs6000_init_hard_regno_mode_ok (bool global_init_p)
 {
-  int r, m, c;
+  ssize_t r, m, c;
   int align64;
   int align32;
 
@@ -1990,17 +2399,48 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
   rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
 
-  /* Precalculate vector information, this must be set up before the
-     rs6000_hard_regno_nregs_internal below.  */
-  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+  /* Precalculate register class to simpler reload register class.  We don't
+     need all of the register classes that are combinations of different
+     classes, just the simple ones that have constraint letters.  */
+  for (c = 0; c < N_REG_CLASSES; c++)
+    reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+  reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+  reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+  reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+  if (TARGET_VSX)
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+    }
+  else
     {
-      rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE;
-      rs6000_vector_reload[m][0] = CODE_FOR_nothing;
-      rs6000_vector_reload[m][1] = CODE_FOR_nothing;
+      reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
     }
 
-  for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++)
-    rs6000_constraints[c] = NO_REGS;
+  /* Precalculate the valid memory formats as well as the vector information,
+     this must be set up before the rs6000_hard_regno_nregs_internal calls
+     below.  */
+  gcc_assert ((int)VECTOR_NONE == 0);
+  memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
+  memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
+
+  gcc_assert ((int)CODE_FOR_nothing == 0);
+  memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
+
+  gcc_assert ((int)NO_REGS == 0);
+  memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
 
   /* The VSX hardware allows native alignment for vectors, but control whether the compiler
      believes it can use native alignment or still uses 128-bit alignment.  */
@@ -2062,12 +2502,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	}
     }
 
-  /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
-     Altivec doesn't have 64-bit support.  */
+  /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
+     do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
   if (TARGET_VSX)
     {
       rs6000_vector_mem[V2DImode] = VECTOR_VSX;
-      rs6000_vector_unit[V2DImode] = VECTOR_NONE;
+      rs6000_vector_unit[V2DImode]
+	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
       rs6000_vector_align[V2DImode] = align64;
     }
 
@@ -2076,14 +2517,48 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
     {
       rs6000_vector_unit[DFmode] = VECTOR_VSX;
       rs6000_vector_mem[DFmode]
-	= (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE);
+	= (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
       rs6000_vector_align[DFmode] = align64;
     }
 
+  /* Allow TImode in VSX register and set the VSX memory macros.  */
+  if (TARGET_VSX && TARGET_VSX_TIMODE)
+    {
+      rs6000_vector_mem[TImode] = VECTOR_VSX;
+      rs6000_vector_align[TImode] = align64;
+    }
+
   /* TODO add SPE and paired floating point vector support.  */
 
   /* Register class constraints for the constraints that depend on compile
-     switches.  */
+     switches. When the VSX code was added, different constraints were added
+     based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
+     of the VSX registers are used.  The register classes for scalar floating
+     point types is set, based on whether we allow that type into the upper
+     (Altivec) registers.  GCC has register classes to target the Altivec
+     registers for load/store operations, to select using a VSX memory
+     operation instead of the traditional floating point operation.  The
+     constraints are:
+
+	d  - Register class to use with traditional DFmode instructions.
+	f  - Register class to use with traditional SFmode instructions.
+	v  - Altivec register.
+	wa - Any VSX register.
+	wd - Preferred register class for V2DFmode.
+	wf - Preferred register class for V4SFmode.
+	wg - Float register for power6x move insns.
+	wl - Float register if we can do 32-bit signed int loads.
+	wm - VSX register for ISA 2.07 direct move operations.
+	wr - GPR if 64-bit mode is permitted.
+	ws - Register class to do ISA 2.06 DF operations.
+	wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
+	wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
+	wt - VSX register for TImode in VSX registers.
+	ww - Register class to do SF conversions in with VSX operations.
+	wx - Float register if we can do 32-bit int stores.
+	wy - Register class to do ISA 2.07 SF operations.
+	wz - Float register if we can do 32-bit unsigned int loads.  */
+
   if (TARGET_HARD_FLOAT && TARGET_FPRS)
     rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
 
@@ -2092,63 +2567,157 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
   if (TARGET_VSX)
     {
-      /* At present, we just use VSX_REGS, but we have different constraints
-	 based on the use, in case we want to fine tune the default register
-	 class used.  wa = any VSX register, wf = register class to use for
-	 V4SF, wd = register class to use for V2DF, and ws = register classs to
-	 use for DF scalars.  */
       rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
-      rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
       rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
-      rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY
-						  ? VSX_REGS
-						  : FLOAT_REGS);
+      rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
+
+      if (TARGET_VSX_TIMODE)
+	rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
+
+      if (TARGET_UPPER_REGS_DF)
+	{
+	  rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
+	  rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+	}
+      else
+	rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
     }
 
+  /* Add conditional constraints based on various options, to allow us to
+     collapse multiple insn patterns.  */
   if (TARGET_ALTIVEC)
     rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
 
-  /* Set up the reload helper functions.  */
+  if (TARGET_MFPGPR)
+    rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
+
+  if (TARGET_LFIWAX)
+    rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+
+  if (TARGET_DIRECT_MOVE)
+    rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+  if (TARGET_POWERPC64)
+    rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+  if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
+    }
+  else if (TARGET_P8_VECTOR)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+    }
+  else if (TARGET_VSX)
+    rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+
+  if (TARGET_STFIWX)
+    rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+
+  if (TARGET_LFIWZX)
+    rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+
+  /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
       if (TARGET_64BIT)
 	{
-	  rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store;
-	  rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load;
-	  rs6000_vector_reload[V8HImode][0]  = CODE_FOR_reload_v8hi_di_store;
-	  rs6000_vector_reload[V8HImode][1]  = CODE_FOR_reload_v8hi_di_load;
-	  rs6000_vector_reload[V4SImode][0]  = CODE_FOR_reload_v4si_di_store;
-	  rs6000_vector_reload[V4SImode][1]  = CODE_FOR_reload_v4si_di_load;
-	  rs6000_vector_reload[V2DImode][0]  = CODE_FOR_reload_v2di_di_store;
-	  rs6000_vector_reload[V2DImode][1]  = CODE_FOR_reload_v2di_di_load;
-	  rs6000_vector_reload[V4SFmode][0]  = CODE_FOR_reload_v4sf_di_store;
-	  rs6000_vector_reload[V4SFmode][1]  = CODE_FOR_reload_v4sf_di_load;
-	  rs6000_vector_reload[V2DFmode][0]  = CODE_FOR_reload_v2df_di_store;
-	  rs6000_vector_reload[V2DFmode][1]  = CODE_FOR_reload_v2df_di_load;
-	  if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
+	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
+	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
+	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
+	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
+	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
+	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
+	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
+	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_di_load;
+	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_di_store;
+	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
+	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
+	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
+	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
+	    {
+	      reg_addr[DFmode].reload_store  = CODE_FOR_reload_df_di_store;
+	      reg_addr[DFmode].reload_load   = CODE_FOR_reload_df_di_load;
+	      reg_addr[DDmode].reload_store  = CODE_FOR_reload_dd_di_store;
+	      reg_addr[DDmode].reload_load   = CODE_FOR_reload_dd_di_load;
+	    }
+	  if (TARGET_P8_VECTOR)
 	    {
-	      rs6000_vector_reload[DFmode][0]  = CODE_FOR_reload_df_di_store;
-	      rs6000_vector_reload[DFmode][1]  = CODE_FOR_reload_df_di_load;
+	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_di_store;
+	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_di_load;
+	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_di_store;
+	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_di_load;
+	    }
+	  if (TARGET_VSX_TIMODE)
+	    {
+	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
+	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
+	    }
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      if (TARGET_POWERPC64)
+		{
+		  reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
+		  reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
+		  reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
+		  reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
+		  reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
+		  reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
+		  reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
+		  reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
+
+		  reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
+		  reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
+		  reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
+		  reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
+		  reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
+		  reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
+		  reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
+		  reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
+		}
+	      else
+		{
+		  reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
+		  reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
+		  reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
+		}
 	    }
 	}
       else
 	{
-	  rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store;
-	  rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load;
-	  rs6000_vector_reload[V8HImode][0]  = CODE_FOR_reload_v8hi_si_store;
-	  rs6000_vector_reload[V8HImode][1]  = CODE_FOR_reload_v8hi_si_load;
-	  rs6000_vector_reload[V4SImode][0]  = CODE_FOR_reload_v4si_si_store;
-	  rs6000_vector_reload[V4SImode][1]  = CODE_FOR_reload_v4si_si_load;
-	  rs6000_vector_reload[V2DImode][0]  = CODE_FOR_reload_v2di_si_store;
-	  rs6000_vector_reload[V2DImode][1]  = CODE_FOR_reload_v2di_si_load;
-	  rs6000_vector_reload[V4SFmode][0]  = CODE_FOR_reload_v4sf_si_store;
-	  rs6000_vector_reload[V4SFmode][1]  = CODE_FOR_reload_v4sf_si_load;
-	  rs6000_vector_reload[V2DFmode][0]  = CODE_FOR_reload_v2df_si_store;
-	  rs6000_vector_reload[V2DFmode][1]  = CODE_FOR_reload_v2df_si_load;
-	  if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
+	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
+	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_si_load;
+	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_si_store;
+	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_si_load;
+	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_si_store;
+	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_si_load;
+	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_si_store;
+	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
+	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
+	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
+	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
+	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
+	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
+	    {
+	      reg_addr[DFmode].reload_store  = CODE_FOR_reload_df_si_store;
+	      reg_addr[DFmode].reload_load   = CODE_FOR_reload_df_si_load;
+	      reg_addr[DDmode].reload_store  = CODE_FOR_reload_dd_si_store;
+	      reg_addr[DDmode].reload_load   = CODE_FOR_reload_dd_si_load;
+	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_si_store;
+	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_si_load;
+	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_si_store;
+	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_si_load;
+	    }
+	  if (TARGET_VSX_TIMODE)
 	    {
-	      rs6000_vector_reload[DFmode][0]  = CODE_FOR_reload_df_si_store;
-	      rs6000_vector_reload[DFmode][1]  = CODE_FOR_reload_df_si_load;
+	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
+	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
 	    }
 	}
     }
@@ -2267,6 +2836,11 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	}
     }
 
+  /* Update the addr mask bits in reg_addr to help secondary reload and go if
+     legitimate address support to figure out the appropriate addressing to
+     use.  */
+  rs6000_setup_reg_addr_masks ();
+
   if (global_init_p || TARGET_DEBUG_TARGET)
     {
       if (TARGET_DEBUG_REG)
@@ -2369,16 +2943,18 @@ darwin_rs6000_override_options (void)
 HOST_WIDE_INT
 rs6000_builtin_mask_calculate (void)
 {
-  return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC  : 0)
-	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	  : 0)
-	  | ((TARGET_SPE)		    ? RS6000_BTM_SPE	  : 0)
-	  | ((TARGET_PAIRED_FLOAT)	    ? RS6000_BTM_PAIRED	  : 0)
-	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	  : 0)
-	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	  : 0)
-	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE  : 0)
-	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES : 0)
-	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD  : 0)
-	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL     : 0));
+  return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC   : 0)
+	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	   : 0)
+	  | ((TARGET_SPE)		    ? RS6000_BTM_SPE	   : 0)
+	  | ((TARGET_PAIRED_FLOAT)	    ? RS6000_BTM_PAIRED	   : 0)
+	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	   : 0)
+	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	   : 0)
+	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE   : 0)
+	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES  : 0)
+	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD   : 0)
+	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
+	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
+	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0));
 }
 
 /* Override command line options.  Mostly we process the processor type and
@@ -2619,7 +3195,10 @@ rs6000_option_override_internal (bool global_init_p)
 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
 	    msg = N_("-mvsx requires hardware floating point");
 	  else
-	    rs6000_isa_flags &= ~ OPTION_MASK_VSX;
+	    {
+	      rs6000_isa_flags &= ~ OPTION_MASK_VSX;
+	      rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+	    }
 	}
       else if (TARGET_PAIRED_FLOAT)
 	msg = N_("-mvsx and -mpaired are incompatible");
@@ -2647,9 +3226,24 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
+  /* If hard-float/altivec/vsx were explicitly turned off then don't allow
+     the -mcpu setting to enable options that conflict. */
+  if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
+      && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
+				       | OPTION_MASK_ALTIVEC
+				       | OPTION_MASK_VSX)) != 0)
+    rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
+			   | OPTION_MASK_DIRECT_MOVE)
+		         & ~rs6000_isa_flags_explicit);
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
+
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
      unless the user explicitly used the -mno-<option> to disable the code.  */
-  if (TARGET_VSX)
+  if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+    rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_VSX)
     rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
   else if (TARGET_POPCNTD)
     rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
@@ -2664,6 +3258,69 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_ALTIVEC)
     rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
 
+  if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+	error ("-mcrypto requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+    }
+
+  if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+	error ("-mdirect-move requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_VSX_TIMODE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
+	error ("-mvsx-timode requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
+    }
+
+  /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+     silently turn off quad memory mode.  */
+  if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+      rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+    }
+
+  /* Enable power8 fusion if we are tuning for power8, even if we aren't
+     generating power8 instructions.  */
+  if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
+    rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
+			 & OPTION_MASK_P8_FUSION);
+
+  /* Power8 does not fuse sign extended loads with the addis.  If we are
+     optimizing at high levels for speed, convert a sign extended load into a
+     zero extending load, and an explicit sign extension.  */
+  if (TARGET_P8_FUSION
+      && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
+      && optimize_function_for_speed_p (cfun)
+      && optimize >= 3)
+    rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
+
   /* E500mc does "better" if we inline more aggressively.  Respect the
      user's opinion, though.  */
   if (rs6000_block_move_inline_limit == 0
@@ -2790,6 +3447,9 @@ rs6000_option_override_internal (bool global_init_p)
   if (flag_section_anchors)
     TARGET_NO_FP_IN_TOC = 1;
 
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
+
 #ifdef SUBTARGET_OVERRIDE_OPTIONS
   SUBTARGET_OVERRIDE_OPTIONS;
 #endif
@@ -2800,6 +3460,9 @@ rs6000_option_override_internal (bool global_init_p)
   SUB3TARGET_OVERRIDE_OPTIONS;
 #endif
 
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
+
   /* For the E500 family of cores, reset the single/double FP flags to let us
      check that they remain constant across attributes or pragmas.  Also,
      clear a possible request for string instructions, not supported and which
@@ -2849,16 +3512,19 @@ rs6000_option_override_internal (bool global_init_p)
 			&& rs6000_cpu != PROCESSOR_POWER5
 			&& rs6000_cpu != PROCESSOR_POWER6
 			&& rs6000_cpu != PROCESSOR_POWER7
+			&& rs6000_cpu != PROCESSOR_POWER8
 			&& rs6000_cpu != PROCESSOR_PPCA2
 			&& rs6000_cpu != PROCESSOR_CELL
 			&& rs6000_cpu != PROCESSOR_PPC476);
   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
 			 || rs6000_cpu == PROCESSOR_POWER5
-			 || rs6000_cpu == PROCESSOR_POWER7);
+			 || rs6000_cpu == PROCESSOR_POWER7
+			 || rs6000_cpu == PROCESSOR_POWER8);
   rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
 				 || rs6000_cpu == PROCESSOR_POWER5
 				 || rs6000_cpu == PROCESSOR_POWER6
 				 || rs6000_cpu == PROCESSOR_POWER7
+				 || rs6000_cpu == PROCESSOR_POWER8
 				 || rs6000_cpu == PROCESSOR_PPCE500MC
 				 || rs6000_cpu == PROCESSOR_PPCE500MC64
 				 || rs6000_cpu == PROCESSOR_PPCE5500
@@ -3102,6 +3768,10 @@ rs6000_option_override_internal (bool global_init_p)
 	rs6000_cost = &power7_cost;
 	break;
 
+      case PROCESSOR_POWER8:
+	rs6000_cost = &power8_cost;
+	break;
+
       case PROCESSOR_PPCA2:
 	rs6000_cost = &ppca2_cost;
 	break;
@@ -3274,7 +3944,8 @@ rs6000_loop_align (rtx label)
       && (rs6000_cpu == PROCESSOR_POWER4
 	  || rs6000_cpu == PROCESSOR_POWER5
 	  || rs6000_cpu == PROCESSOR_POWER6
-	  || rs6000_cpu == PROCESSOR_POWER7))
+	  || rs6000_cpu == PROCESSOR_POWER7
+	  || rs6000_cpu == PROCESSOR_POWER8))
     return 5;
   else
     return align_loops_log;
@@ -3813,6 +4484,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
       switch (fn)
 	{
+	case BUILT_IN_CLZIMAX:
+	case BUILT_IN_CLZLL:
+	case BUILT_IN_CLZL:
+	case BUILT_IN_CLZ:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+	    }
+	  break;
 	case BUILT_IN_COPYSIGN:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -3828,6 +4515,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
 	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
 	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
 	  break;
+	case BUILT_IN_POPCOUNTIMAX:
+	case BUILT_IN_POPCOUNTLL:
+	case BUILT_IN_POPCOUNTL:
+	case BUILT_IN_POPCOUNT:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+	    }
+	  break;
 	case BUILT_IN_SQRT:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -4698,8 +5401,11 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 	{
 	  rtx freg = gen_reg_rtx (V4SFmode);
 	  rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx cvt  = ((TARGET_XSCVDPSPN)
+		      ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+		      : gen_vsx_xscvdpsp_scalar (freg, sreg));
 
-	  emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+	  emit_insn (cvt);
 	  emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
 	}
       else
@@ -4938,7 +5644,7 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
 	 purpose.  */
       if (GET_CODE (op) == SUBREG
 	  && (mode == SImode || mode == DImode || mode == TImode
-	      || mode == DDmode || mode == TDmode)
+	      || mode == DDmode || mode == TDmode || mode == PTImode)
 	  && REG_P (SUBREG_REG (op))
 	  && (GET_MODE (SUBREG_REG (op)) == DFmode
 	      || GET_MODE (SUBREG_REG (op)) == TFmode))
@@ -4951,6 +5657,7 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
 	  && REG_P (SUBREG_REG (op))
 	  && (GET_MODE (SUBREG_REG (op)) == DImode
 	      || GET_MODE (SUBREG_REG (op)) == TImode
+	      || GET_MODE (SUBREG_REG (op)) == PTImode
 	      || GET_MODE (SUBREG_REG (op)) == DDmode
 	      || GET_MODE (SUBREG_REG (op)) == TDmode))
 	return true;
@@ -5087,6 +5794,72 @@ gpr_or_gpr_p (rtx op0, rtx op1)
 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
 }
 
+/* Return true if this is a move direct operation between GPR registers and
+   floating point/VSX registers.  */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+  int regno0, regno1;
+
+  if (!REG_P (op0) || !REG_P (op1))
+    return false;
+
+  if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+    return false;
+
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+  if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+    return false;
+
+  if (INT_REGNO_P (regno0))
+    return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+  else if (INT_REGNO_P (regno1))
+    {
+      if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+	return true;
+
+      else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if this is a load or store quad operation.  */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+  bool ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = false;
+
+  else if (REG_P (op0) && MEM_P (op1))
+    ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+	   && quad_memory_operand (op1, GET_MODE (op1))
+	   && !reg_overlap_mentioned_p (op0, op1));
+
+  else if (MEM_P (op0) && REG_P (op1))
+    ret = (quad_memory_operand (op0, GET_MODE (op0))
+	   && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+  else
+    ret = false;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== quad_load_store, return %s\n",
+	       ret ? "true" : "false");
+      debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+    }
+
+  return ret;
+}
+
 /* Given an address, return a constant offset term if one exists.  */
 
 static rtx
@@ -5170,7 +5943,11 @@ reg_offset_addressing_ok_p (enum machine_mode mode)
     case V4SImode:
     case V2DFmode:
     case V2DImode:
-      /* AltiVec/VSX vector modes.  Only reg+reg addressing is valid.  */
+    case TImode:
+      /* AltiVec/VSX vector modes.  Only reg+reg addressing is valid.  While
+	 TImode is not a vector mode, if we want to use the VSX registers to
+	 move it around, we need to restrict ourselves to reg+reg
+	 addressing.  */
       if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
 	return false;
       break;
@@ -5184,6 +5961,13 @@ reg_offset_addressing_ok_p (enum machine_mode mode)
         return false;
       break;
 
+    case SDmode:
+      /* If we can do direct load/stores of SDmode, restrict it to reg+reg
+	 addressing for the LFIWZX and STFIWX instructions.  */
+      if (TARGET_NO_SDMODE_STACK)
+	return false;
+      break;
+
     default:
       break;
     }
@@ -5416,7 +6200,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
 
       /* If we are using VSX scalar loads, restrict ourselves to reg+reg
 	 addressing.  */
-      if (mode == DFmode && VECTOR_MEM_VSX_P (DFmode))
+      if (VECTOR_MEM_VSX_P (mode))
 	return false;
 
       if (!worst_case)
@@ -5435,6 +6219,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
 
     case TDmode:
     case TImode:
+    case PTImode:
       extra = 8;
       if (!worst_case)
 	break;
@@ -5583,8 +6368,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
 	return force_reg (Pmode, XEXP (x, 0));
 
+      /* For TImode with load/store quad, restrict addresses to just a single
+	 pointer, so it works with both GPRs and VSX registers.  */
       /* Make sure both operands are registers.  */
-      else if (GET_CODE (x) == PLUS)
+      else if (GET_CODE (x) == PLUS
+	       && (mode != TImode || !TARGET_QUAD_MEMORY))
 	return gen_rtx_PLUS (Pmode,
 			     force_reg (Pmode, XEXP (x, 0)),
 			     force_reg (Pmode, XEXP (x, 1)));
@@ -5604,11 +6392,12 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
     case TFmode:
     case TDmode:
     case TImode:
+    case PTImode:
       /* As in legitimate_offset_address_p we do not assume
 	 worst-case.  The mode here is just a hint as to the registers
 	 used.  A TImode is usually in gprs, but may actually be in
 	 fprs.  Leave worst-case scenario for reload to handle via
-	 insn constraints.  */
+	 insn constraints.  PTImode is only GPRs.  */
       extra = 8;
       break;
     default:
@@ -6339,7 +7128,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
       && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
 				  || mode == DDmode || mode == TDmode
 				  || mode == DImode))
-      && VECTOR_MEM_NONE_P (mode))
+      && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
     {
       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
       HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
@@ -6370,7 +7159,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
 
   if (GET_CODE (x) == SYMBOL_REF
       && reg_offset_p
-      && VECTOR_MEM_NONE_P (mode)
+      && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
       && !SPE_VECTOR_MODE (mode)
 #if TARGET_MACHO
       && DEFAULT_ABI == ABI_DARWIN
@@ -6396,6 +7185,8 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
 	 mem is sufficiently aligned.  */
       && mode != TFmode
       && mode != TDmode
+      && (mode != TImode || !TARGET_VSX_TIMODE)
+      && mode != PTImode
       && (mode != DImode || TARGET_POWERPC64)
       && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
 	  || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
@@ -6516,15 +7307,9 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
     return 0;
   if (legitimate_indirect_address_p (x, reg_ok_strict))
     return 1;
-  if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
-      && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
-      && !SPE_VECTOR_MODE (mode)
-      && mode != TFmode
-      && mode != TDmode
-      /* Restrict addressing for DI because of our SUBREG hackery.  */
-      && !(TARGET_E500_DOUBLE
-	   && (mode == DFmode || mode == DDmode || mode == DImode))
-      && TARGET_UPDATE
+  if (TARGET_UPDATE
+      && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+      && mode_supports_pre_incdec_p (mode)
       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
     return 1;
   if (virtual_stack_registers_memory_p (x))
@@ -6534,6 +7319,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
   if (reg_offset_p
       && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
     return 1;
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+     allow register indirect addresses.  This will allow the values to go in
+     either GPRs or VSX registers without reloading.  The vector types would
+     tend to go into VSX registers, so we allow REG+REG, while TImode seems
+     somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
+    return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
       && reg_offset_p
@@ -6545,31 +7337,20 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
     return 1;
   if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
     return 1;
-  if (mode != TImode
-      && mode != TFmode
+  if (mode != TFmode
       && mode != TDmode
       && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
 	  || TARGET_POWERPC64
 	  || (mode != DFmode && mode != DDmode)
 	  || (TARGET_E500_DOUBLE && mode != DDmode))
       && (TARGET_POWERPC64 || mode != DImode)
+      && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
+      && mode != PTImode
       && !avoiding_indexed_address_p (mode)
       && legitimate_indexed_address_p (x, reg_ok_strict))
     return 1;
-  if (GET_CODE (x) == PRE_MODIFY
-      && mode != TImode
-      && mode != TFmode
-      && mode != TDmode
-      && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
-	  || TARGET_POWERPC64
-	  || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
-      && (TARGET_POWERPC64 || mode != DImode)
-      && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
-      && !SPE_VECTOR_MODE (mode)
-      /* Restrict addressing for DI because of our SUBREG hackery.  */
-      && !(TARGET_E500_DOUBLE
-	   && (mode == DFmode || mode == DDmode || mode == DImode))
-      && TARGET_UPDATE
+  if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
+      && mode_supports_pre_modify_p (mode)
       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
       && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
 					      reg_ok_strict, false)
@@ -6590,10 +7371,13 @@ rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
   bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
   fprintf (stderr,
 	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
-	   "strict = %d, code = %s\n",
+	   "strict = %d, reload = %s, code = %s\n",
 	   ret ? "true" : "false",
 	   GET_MODE_NAME (mode),
 	   reg_ok_strict,
+	   (reload_completed
+	    ? "after"
+	    : (reload_in_progress ? "progress" : "before")),
 	   GET_RTX_NAME (GET_CODE (x)));
   debug_rtx (x);
 
@@ -7007,7 +7791,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
 }
 
 /* Helper for the following.  Get rid of [r+r] memory refs
-   in cases where it won't work (TImode, TFmode, TDmode).  */
+   in cases where it won't work (TImode, TFmode, TDmode, PTImode).  */
 
 static void
 rs6000_eliminate_indexed_memrefs (rtx operands[2])
@@ -7152,6 +7936,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 
   if (reload_in_progress
       && mode == SDmode
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
       && MEM_P (operands[0])
       && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
       && REG_P (operands[1]))
@@ -7176,6 +7961,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
       && mode == SDmode
       && REG_P (operands[0])
       && MEM_P (operands[1])
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
       && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
     {
       if (FP_REGNO_P (REGNO (operands[0])))
@@ -7389,6 +8175,11 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
       break;
 
     case TImode:
+      if (!VECTOR_MEM_VSX_P (TImode))
+	rs6000_eliminate_indexed_memrefs (operands);
+      break;
+
+    case PTImode:
       rs6000_eliminate_indexed_memrefs (operands);
       break;
 
@@ -10428,6 +11219,27 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  return const0_rtx;
 	}
     }
+  else if (icode == CODE_FOR_crypto_vshasigmaw
+	   || icode == CODE_FOR_crypto_vshasigmad)
+    {
+      /* Check whether the 2nd and 3rd arguments are integer constants and in
+	 range and prepare arguments.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+	{
+	  error ("argument 2 must be 0 or 1");
+	  return const0_rtx;
+	}
+
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+	{
+	  error ("argument 3 must be in the range 0..15");
+	  return const0_rtx;
+	}
+    }
 
   if (target == 0
       || GET_MODE (target) != tmode
@@ -12118,6 +12930,10 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V4SI_type_node,
 				V4SI_type_node, NULL_TREE);
+  tree int_ftype_int_v2di_v2di
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V2DI_type_node,
+				V2DI_type_node, NULL_TREE);
   tree void_ftype_v4si
     = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_void
@@ -12200,6 +13016,8 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V2DF_type_node,
 				V2DF_type_node, NULL_TREE);
+  tree v2di_ftype_v2di
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
   tree v4si_ftype_v4si
     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_v8hi
@@ -12335,6 +13153,9 @@ altivec_init_builtins (void)
 	case VOIDmode:
 	  type = int_ftype_int_opaque_opaque;
 	  break;
+	case V2DImode:
+	  type = int_ftype_int_v2di_v2di;
+	  break;
 	case V4SImode:
 	  type = int_ftype_int_v4si_v4si;
 	  break;
@@ -12368,6 +13189,9 @@ altivec_init_builtins (void)
 
       switch (mode0)
 	{
+	case V2DImode:
+	  type = v2di_ftype_v2di;
+	  break;
 	case V4SImode:
 	  type = v4si_ftype_v4si;
 	  break;
@@ -12573,11 +13397,27 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
      are type correct.  */
   switch (builtin)
     {
+      /* unsigned 1 argument functions.  */
+    case CRYPTO_BUILTIN_VSBOX:
+    case P8V_BUILTIN_VGBBD:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      break;
+
       /* unsigned 2 argument functions.  */
     case ALTIVEC_BUILTIN_VMULEUB_UNS:
     case ALTIVEC_BUILTIN_VMULEUH_UNS:
     case ALTIVEC_BUILTIN_VMULOUB_UNS:
     case ALTIVEC_BUILTIN_VMULOUH_UNS:
+    case CRYPTO_BUILTIN_VCIPHER:
+    case CRYPTO_BUILTIN_VCIPHERLAST:
+    case CRYPTO_BUILTIN_VNCIPHER:
+    case CRYPTO_BUILTIN_VNCIPHERLAST:
+    case CRYPTO_BUILTIN_VPMSUMB:
+    case CRYPTO_BUILTIN_VPMSUMH:
+    case CRYPTO_BUILTIN_VPMSUMW:
+    case CRYPTO_BUILTIN_VPMSUMD:
+    case CRYPTO_BUILTIN_VPMSUM:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -12600,6 +13440,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
     case VSX_BUILTIN_XXSEL_8HI_UNS:
     case VSX_BUILTIN_XXSEL_4SI_UNS:
     case VSX_BUILTIN_XXSEL_2DI_UNS:
+    case CRYPTO_BUILTIN_VPERMXOR:
+    case CRYPTO_BUILTIN_VPERMXOR_V2DI:
+    case CRYPTO_BUILTIN_VPERMXOR_V4SI:
+    case CRYPTO_BUILTIN_VPERMXOR_V8HI:
+    case CRYPTO_BUILTIN_VPERMXOR_V16QI:
+    case CRYPTO_BUILTIN_VSHASIGMAW:
+    case CRYPTO_BUILTIN_VSHASIGMAD:
+    case CRYPTO_BUILTIN_VSHASIGMA:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -12741,8 +13589,23 @@ rs6000_common_init_builtins (void)
       else
 	{
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-	    continue;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
 
 	  type = builtin_function_type (insn_data[icode].operand[0].mode,
 					insn_data[icode].operand[1].mode,
@@ -12781,8 +13644,23 @@ rs6000_common_init_builtins (void)
       else
 	{
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-	    continue;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -12843,8 +13721,23 @@ rs6000_common_init_builtins (void)
       else
         {
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-	    continue;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -13632,7 +14525,7 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
   static bool eliminated = false;
   rtx ret;
 
-  if (mode != SDmode)
+  if (mode != SDmode || TARGET_NO_SDMODE_STACK)
     ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
   else
     {
@@ -13691,29 +14584,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
   return NULL_TREE;
 }
 
-enum reload_reg_type {
-  GPR_REGISTER_TYPE,
-  VECTOR_REGISTER_TYPE,
-  OTHER_REGISTER_TYPE
-};
+/* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
+   on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+   only work on the traditional altivec registers, note if an altivec register
+   was chosen.  */
 
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
 {
-  switch (rclass)
+  HOST_WIDE_INT regno;
+  enum reg_class rclass;
+
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  if (!REG_P (reg))
+    return NO_REG_TYPE;
+
+  regno = REGNO (reg);
+  if (regno >= FIRST_PSEUDO_REGISTER)
     {
-    case GENERAL_REGS:
-    case BASE_REGS:
-      return GPR_REGISTER_TYPE;
+      if (!lra_in_progress && !reload_in_progress && !reload_completed)
+	return PSEUDO_REG_TYPE;
 
-    case FLOAT_REGS:
-    case ALTIVEC_REGS:
-    case VSX_REGS:
-      return VECTOR_REGISTER_TYPE;
+      regno = true_regnum (reg);
+      if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	return PSEUDO_REG_TYPE;
+    }	
 
-    default:
-      return OTHER_REGISTER_TYPE;
+  gcc_assert (regno >= 0);
+
+  if (is_altivec && ALTIVEC_REGNO_P (regno))
+    *is_altivec = true;
+
+  rclass = rs6000_regno_regclass[regno];
+  return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+   different register classe is really a simple move.  */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode)
+{
+  int size;
+
+  /* Add support for various direct moves available.  In this function, we only
+     look at cases where we don't need any extra registers, and one or more
+     simple move insns are issued.  At present, 32-bit integers are not allowed
+     in FPR/VSX registers.  Single precision binary floating is not a simple
+     move because we need to convert to the single precision memory layout.
+     The 4-byte SDmode can be moved.  */
+  size = GET_MODE_SIZE (mode);
+  if (TARGET_DIRECT_MOVE
+      && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+      && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+	   && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+	       || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+   special direct moves that involve allocating an extra register, return the
+   insn code of the helper function if there is such a function or
+   CODE_FOR_nothing if not.  */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode,
+				     secondary_reload_info *sri,
+				     bool altivec_p)
+{
+  bool ret = false;
+  enum insn_code icode = CODE_FOR_nothing;
+  int cost = 0;
+  int size = GET_MODE_SIZE (mode);
+
+  if (TARGET_POWERPC64)
+    {
+      if (size == 16)
+	{
+	  /* Handle moving 128-bit values from GPRs to VSX point registers on
+	     power8 when running in 64-bit mode using XXPERMDI to glue the two
+	     64-bit values back together.  */
+	  if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	      icode = reg_addr[mode].reload_vsx_gpr;
+	    }
+
+	  /* Handle moving 128-bit values from VSX point registers to GPRs on
+	     power8 when running in 64-bit mode using XXPERMDI to get access to the
+	     bottom 64-bit value.  */
+	  else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	      icode = reg_addr[mode].reload_gpr_vsx;
+	    }
+	}
+
+      else if (mode == SFmode)
+	{
+	  if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* xscvdpspn, mfvsrd, and.  */
+	      icode = reg_addr[mode].reload_gpr_vsx;
+	    }
+
+	  else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 2;			/* mtvsrz, xscvspdpn.  */
+	      icode = reg_addr[mode].reload_vsx_gpr;
+	    }
+	}
+    }
+
+  if (TARGET_POWERPC64 && size == 16)
+    {
+      /* Handle moving 128-bit values from GPRs to VSX point registers on
+	 power8 when running in 64-bit mode using XXPERMDI to glue the two
+	 64-bit values back together.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	  icode = reg_addr[mode].reload_vsx_gpr;
+	}
+
+      /* Handle moving 128-bit values from VSX point registers to GPRs on
+	 power8 when running in 64-bit mode using XXPERMDI to get access to the
+	 bottom 64-bit value.  */
+      else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	  icode = reg_addr[mode].reload_gpr_vsx;
+	}
+    }
+
+  else if (!TARGET_POWERPC64 && size == 8)
+    {
+      /* Handle moving 64-bit values from GPRs to floating point registers on
+	 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+	 values back together.  Altivec register classes must be handled
+	 specially since a different instruction is used, and the secondary
+	 reload support requires a single instruction class in the scratch
+	 register constraint.  However, right now TFmode is not allowed in
+	 Altivec registers, so the pattern will never match.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+	{
+	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
+	  icode = reg_addr[mode].reload_fpr_gpr;
+	}
+    }
+
+  if (icode != CODE_FOR_nothing)
+    {
+      ret = true;
+      if (sri)
+	{
+	  sri->icode = icode;
+	  sri->extra_cost = cost;
+	}
+    }
+
+  return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+   directly (simple move) or via a pattern that uses a single extra temporary
+   (using power8's direct move in this case.  */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+			      enum rs6000_reg_type from_type,
+			      enum machine_mode mode,
+			      secondary_reload_info *sri,
+			      bool altivec_p)
+{
+  /* Fall back to load/store reloads if either type is not a register.  */
+  if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+    return false;
+
+  /* If we haven't allocated registers yet, assume the move can be done for the
+     standard register types.  */
+  if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+      || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+      || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+    return true;
+
+  /* Moves to the same set of registers is a simple move for non-specialized
+     registers.  */
+  if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+    return true;
+
+  /* Check whether a simple move can be done directly.  */
+  if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+    {
+      if (sri)
+	{
+	  sri->icode = CODE_FOR_nothing;
+	  sri->extra_cost = 0;
+	}
+      return true;
     }
+
+  /* Now check if we can do it in a few steps.  */
+  return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+					      altivec_p);
 }
 
 /* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13739,11 +14829,35 @@ rs6000_secondary_reload (bool in_p,
   bool default_p = false;
 
   sri->icode = CODE_FOR_nothing;
+  icode = ((in_p)
+	   ? reg_addr[mode].reload_load
+	   : reg_addr[mode].reload_store);
 
-  /* Convert vector loads and stores into gprs to use an additional base
-     register.  */
-  icode = rs6000_vector_reload[mode][in_p != false];
-  if (icode != CODE_FOR_nothing)
+  if (REG_P (x) || register_operand (x, mode))
+    {
+      enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+      bool altivec_p = (rclass == ALTIVEC_REGS);
+      enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+      if (!in_p)
+	{
+	  enum rs6000_reg_type exchange = to_type;
+	  to_type = from_type;
+	  from_type = exchange;
+	}
+
+      /* Can we do a direct move of some sort?  */
+      if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+					altivec_p))
+	{
+	  icode = (enum insn_code)sri->icode;
+	  default_p = false;
+	  ret = NO_REGS;
+	}
+    }
+
+  /* Handle vector moves with reload helper functions.  */
+  if (ret == ALL_REGS && icode != CODE_FOR_nothing)
     {
       ret = NO_REGS;
       sri->icode = CODE_FOR_nothing;
@@ -13755,12 +14869,21 @@ rs6000_secondary_reload (bool in_p,
 
 	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
 	     an extra register in that case, but it would need an extra
-	     register if the addressing is reg+reg or (reg+reg)&(-16).  */
+	     register if the addressing is reg+reg or (reg+reg)&(-16).  Special
+	     case load/store quad.  */
 	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
 	    {
-	      if (!legitimate_indirect_address_p (addr, false)
-		  && !rs6000_legitimate_offset_address_p (TImode, addr,
-							  false, true))
+	      if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+		  && GET_MODE_SIZE (mode) == 16
+		  && quad_memory_operand (x, mode))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 2;
+		}
+
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && !rs6000_legitimate_offset_address_p (PTImode, addr,
+							       false, true))
 		{
 		  sri->icode = icode;
 		  /* account for splitting the loads, and converting the
@@ -13769,8 +14892,20 @@ rs6000_secondary_reload (bool in_p,
 				     + ((GET_CODE (addr) == AND) ? 1 : 0));
 		}
 	    }
-	  /* Loads to and stores from vector registers can only do reg+reg
-	     addressing.  Altivec registers can also do (reg+reg)&(-16).  */
+         /* Allow scalar loads to/from the traditional floating point
+            registers, even if VSX memory is set.  */
+         else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
+                  && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+                  && (legitimate_indirect_address_p (addr, false)
+                      || legitimate_indirect_address_p (addr, false)
+                      || rs6000_legitimate_offset_address_p (mode, addr,
+                                                             false, true)))
+
+           ;
+         /* Loads to and stores from vector registers can only do reg+reg
+            addressing.  Altivec registers can also do (reg+reg)&(-16).  Allow
+            scalar modes loading up the traditional floating point registers
+            to use offset addresses.  */
 	  else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
 		   || rclass == FLOAT_REGS || rclass == NO_REGS)
 	    {
@@ -13814,12 +14949,12 @@ rs6000_secondary_reload (bool in_p,
 	  else
 	    {
 	      enum reg_class xclass = REGNO_REG_CLASS (regno);
-	      enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
-	      enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+	      enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+	      enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
 
 	      /* If memory is needed, use default_secondary_reload to create the
 		 stack slot.  */
-	      if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+	      if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
 		default_p = true;
 	      else
 		ret = NO_REGS;
@@ -13829,7 +14964,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
@@ -13868,7 +15003,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (!TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
     {
@@ -13946,6 +15081,36 @@ rs6000_secondary_reload (bool in_p,
   return ret;
 }
 
+/* Better tracing for rs6000_secondary_reload_inner.  */
+
+static void
+rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
+			       bool store_p)
+{
+  rtx set, clobber;
+
+  gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
+
+  fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
+	   store_p ? "store" : "load");
+
+  if (store_p)
+    set = gen_rtx_SET (VOIDmode, mem, reg);
+  else
+    set = gen_rtx_SET (VOIDmode, reg, mem);
+
+  clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
+  debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+}
+
+static void
+rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
+			      bool store_p)
+{
+  rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
+  gcc_unreachable ();
+}
+
 /* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
    to SP+reg addressing.  */
 
@@ -13964,19 +15129,14 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
   rtx cc_clobber;
 
   if (TARGET_DEBUG_ADDR)
-    {
-      fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n",
-	       store_p ? "store" : "load");
-      fprintf (stderr, "reg:\n");
-      debug_rtx (reg);
-      fprintf (stderr, "mem:\n");
-      debug_rtx (mem);
-      fprintf (stderr, "scratch:\n");
-      debug_rtx (scratch);
-    }
+    rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
+
+  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+  if (GET_CODE (mem) != MEM)
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-  gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
-  gcc_assert (GET_CODE (mem) == MEM);
   rclass = REGNO_REG_CLASS (regno);
   addr = XEXP (mem, 0);
 
@@ -13995,19 +15155,24 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
       if (GET_CODE (addr) == PRE_MODIFY)
 	{
 	  scratch_or_premodify = XEXP (addr, 0);
-	  gcc_assert (REG_P (scratch_or_premodify));
-	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
+	  if (!REG_P (scratch_or_premodify))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  if (GET_CODE (XEXP (addr, 1)) != PLUS)
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
 	  addr = XEXP (addr, 1);
 	}
 
       if (GET_CODE (addr) == PLUS
 	  && (and_op2 != NULL_RTX
-	      || !rs6000_legitimate_offset_address_p (TImode, addr,
+	      || !rs6000_legitimate_offset_address_p (PTImode, addr,
 						      false, true)))
 	{
 	  addr_op1 = XEXP (addr, 0);
 	  addr_op2 = XEXP (addr, 1);
-	  gcc_assert (legitimate_indirect_address_p (addr_op1, false));
+	  if (!legitimate_indirect_address_p (addr_op1, false))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
 	  if (!REG_P (addr_op2)
 	      && (GET_CODE (addr_op2) != CONST_INT
@@ -14035,7 +15200,7 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	  scratch_or_premodify = scratch;
 	}
       else if (!legitimate_indirect_address_p (addr, false)
-	       && !rs6000_legitimate_offset_address_p (TImode, addr,
+	       && !rs6000_legitimate_offset_address_p (PTImode, addr,
 						       false, true))
 	{
 	  if (TARGET_DEBUG_ADDR)
@@ -14051,9 +15216,21 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	}
       break;
 
-      /* Float/Altivec registers can only handle reg+reg addressing.  Move
-	 other addresses into a scratch register.  */
+      /* Float registers can do offset+reg addressing for scalar types.  */
     case FLOAT_REGS:
+      if (legitimate_indirect_address_p (addr, false)	/* reg */
+	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
+	  || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+	      && and_op2 == NULL_RTX
+	      && scratch_or_premodify == scratch
+	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+	break;
+
+      /* If this isn't a legacy floating point load/store, fall through to the
+	 VSX defaults.  */
+
+      /* VSX/Altivec registers can only handle reg+reg addressing.  Move other
+	 addresses into a scratch register.  */
     case VSX_REGS:
     case ALTIVEC_REGS:
 
@@ -14073,36 +15250,38 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
       /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
 	 as the address later.  */
       if (GET_CODE (addr) == PRE_MODIFY
-	  && (!VECTOR_MEM_VSX_P (mode)
+	  && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	       && (rclass != FLOAT_REGS
+		   || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
 	      || and_op2 != NULL_RTX
 	      || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
 	{
 	  scratch_or_premodify = XEXP (addr, 0);
-	  gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
-						     false));
-	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
+	  if (!legitimate_indirect_address_p (scratch_or_premodify, false))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  if (GET_CODE (XEXP (addr, 1)) != PLUS)
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
 	  addr = XEXP (addr, 1);
 	}
 
       if (legitimate_indirect_address_p (addr, false)	/* reg */
 	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
-	  || GET_CODE (addr) == PRE_MODIFY		/* VSX pre-modify */
 	  || (GET_CODE (addr) == AND			/* Altivec memory */
+	      && rclass == ALTIVEC_REGS
 	      && GET_CODE (XEXP (addr, 1)) == CONST_INT
 	      && INTVAL (XEXP (addr, 1)) == -16
-	      && VECTOR_MEM_ALTIVEC_P (mode))
-	  || (rclass == FLOAT_REGS			/* legacy float mem */
-	      && GET_MODE_SIZE (mode) == 8
-	      && and_op2 == NULL_RTX
-	      && scratch_or_premodify == scratch
-	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+	      && (legitimate_indirect_address_p (XEXP (addr, 0), false)
+		  || legitimate_indexed_address_p (XEXP (addr, 0), false))))
 	;
 
       else if (GET_CODE (addr) == PLUS)
 	{
 	  addr_op1 = XEXP (addr, 0);
 	  addr_op2 = XEXP (addr, 1);
-	  gcc_assert (REG_P (addr_op1));
+	  if (!REG_P (addr_op1))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
 	  if (TARGET_DEBUG_ADDR)
 	    {
@@ -14121,7 +15300,8 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	}
 
       else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
-	       || GET_CODE (addr) == CONST_INT || REG_P (addr))
+	       || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
+	       || REG_P (addr))
 	{
 	  if (TARGET_DEBUG_ADDR)
 	    {
@@ -14137,12 +15317,12 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	}
 
       else
-	gcc_unreachable ();
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
       break;
 
     default:
-      gcc_unreachable ();
+      rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
     }
 
   /* If the original address involved a pre-modify that we couldn't use the VSX
@@ -14189,7 +15369,7 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
   /* Adjust the address if it changed.  */
   if (addr != XEXP (mem, 0))
     {
-      mem = change_address (mem, mode, addr);
+      mem = replace_equiv_address_nv (mem, addr);
       if (TARGET_DEBUG_ADDR)
 	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
     }
@@ -14254,8 +15434,10 @@ rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
   return;
 }
 
-/* Allocate a 64-bit stack slot to be used for copying SDmode
-   values through if this function has any SDmode references.  */
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
+   this function has any SDmode references.  If we are on a power7 or later, we
+   don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
+   can load/store the value.  */
 
 static void
 rs6000_alloc_sdmode_stack_slot (void)
@@ -14266,6 +15448,9 @@ rs6000_alloc_sdmode_stack_slot (void)
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
 
+  if (TARGET_NO_SDMODE_STACK)
+    return;
+
   FOR_EACH_BB (bb)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
@@ -14326,8 +15511,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
 {
   enum machine_mode mode = GET_MODE (x);
 
-  if (VECTOR_UNIT_VSX_P (mode)
-      && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+  if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
     return rclass;
 
   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
@@ -14382,42 +15566,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
    set and vice versa.  */
 
 static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
-				enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+				enum reg_class to_class,
 				enum machine_mode mode)
 {
-  if (class1 == class2)
-    return false;
-
-  /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
-     ALTIVEC_REGS, and FLOAT_REGS).  We don't need to use memory to copy
-     between these classes.  But we need memory for other things that can go in
-     FLOAT_REGS like SFmode.  */
-  if (TARGET_VSX
-      && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
-      && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
-	  || class1 == FLOAT_REGS))
-    return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
-	    && class2 != FLOAT_REGS);
-
-  if (class1 == VSX_REGS || class2 == VSX_REGS)
-    return true;
+  enum rs6000_reg_type from_type, to_type;
+  bool altivec_p = ((from_class == ALTIVEC_REGS)
+		    || (to_class == ALTIVEC_REGS));
 
-  if (class1 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  /* If a simple/direct move is available, we don't need secondary memory  */
+  from_type = reg_class_to_reg_type[(int)from_class];
+  to_type = reg_class_to_reg_type[(int)to_class];
 
-  if (class2 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  if (rs6000_secondary_reload_move (to_type, from_type, mode,
+				    (secondary_reload_info *)0, altivec_p))
+    return false;
 
-  if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+  /* If we have a floating point or vector register class, we need to use
+     memory to transfer the data.  */
+  if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
     return true;
 
   return false;
@@ -14425,17 +15592,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
 
 /* Debug version of rs6000_secondary_memory_needed.  */
 static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
-				      enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+				      enum reg_class to_class,
 				      enum machine_mode mode)
 {
-  bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+  bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
 
   fprintf (stderr,
-	   "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
-	   "class2 = %s, mode = %s\n",
-	   ret ? "true" : "false", reg_class_names[class1],
-	   reg_class_names[class2], GET_MODE_NAME (mode));
+	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+	   "to_class = %s, mode = %s\n",
+	   ret ? "true" : "false",
+	   reg_class_names[from_class],
+	   reg_class_names[to_class],
+	   GET_MODE_NAME (mode));
 
   return ret;
 }
@@ -14502,11 +15671,17 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
     return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
 
   /* Memory, and FP/altivec registers can go into fp/altivec registers under
-     VSX.  */
+     VSX.  However, for scalar variables, use the traditional floating point
+     registers so that we can use offset+register addressing.  */
   if (TARGET_VSX
       && (regno == -1 || VSX_REGNO_P (regno))
       && VSX_REG_CLASS_P (rclass))
-    return NO_REGS;
+    {
+      if (GET_MODE_SIZE (mode) < 16)
+	return FLOAT_REGS;
+
+      return NO_REGS;
+    }
 
   /* Memory, and AltiVec registers can go into AltiVec registers.  */
   if ((regno == -1 || ALTIVEC_REGNO_P (regno))
@@ -14551,8 +15726,35 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
   if (from_size != to_size)
     {
       enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
-      return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
-	      && reg_classes_intersect_p (xclass, rclass));
+
+      if (reg_classes_intersect_p (xclass, rclass))
+	{
+	  unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
+	  unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+
+	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
+	     single register under VSX because the scalar part of the register
+	     is in the upper 64-bits, and not the lower 64-bits.  Types like
+	     TFmode/TDmode that take 2 scalar register can overlap.  128-bit
+	     IEEE floating point can't overlap, and neither can small
+	     values.  */
+
+	  if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+	    return true;
+
+	  if (from_size < 8 || to_size < 8)
+	    return true;
+
+	  if (from_size == 8 && (8 * to_nregs) != to_size)
+	    return true;
+
+	  if (to_size == 8 && (8 * from_nregs) != from_size)
+	    return true;
+
+	  return false;
+	}
+      else
+	return false;
     }
 
   if (TARGET_E500_DOUBLE
@@ -14566,9 +15768,18 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
   /* Since the VSX register set includes traditional floating point registers
      and altivec registers, just check for the size being different instead of
      trying to check whether the modes are vector modes.  Otherwise it won't
-     allow say DF and DI to change classes.  */
+     allow say DF and DI to change classes.  For types like TFmode and TDmode
+     that take 2 64-bit registers, rather than a single 128-bit register, don't
+     allow subregs of those types to other 128 bit types.  */
   if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
-    return (from_size != 8 && from_size != 16);
+    {
+      unsigned num_regs = (from_size + 15) / 16;
+      if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
+	  || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
+	return true;
+
+      return (from_size != 8 && from_size != 16);
+    }
 
   if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
       && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
@@ -14599,25 +15810,180 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
   return ret;
 }
 
-/* Given a comparison operation, return the bit number in CCR to test.  We
-   know this is a valid comparison.
+/* Return a string to do a move operation of 128 bits of data.  */
 
-   SCC_P is 1 if this is for an scc.  That means that %D will have been
-   used instead of %C, so the bits will be in different places.
-
-   Return -1 if OP isn't a valid comparison for some reason.  */
-
-int
-ccr_bit (rtx op, int scc_p)
+const char *
+rs6000_output_move_128bit (rtx operands[])
 {
-  enum rtx_code code = GET_CODE (op);
-  enum machine_mode cc_mode;
-  int cc_regnum;
-  int base_bit;
-  rtx reg;
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+  int dest_regno;
+  int src_regno;
+  bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
+  bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
 
-  if (!COMPARISON_P (op))
-    return -1;
+  if (REG_P (dest))
+    {
+      dest_regno = REGNO (dest);
+      dest_gpr_p = INT_REGNO_P (dest_regno);
+      dest_fp_p = FP_REGNO_P (dest_regno);
+      dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
+      dest_vsx_p = dest_fp_p | dest_vmx_p;
+    }
+  else
+    {
+      dest_regno = -1;
+      dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
+    }
+
+  if (REG_P (src))
+    {
+      src_regno = REGNO (src);
+      src_gpr_p = INT_REGNO_P (src_regno);
+      src_fp_p = FP_REGNO_P (src_regno);
+      src_vmx_p = ALTIVEC_REGNO_P (src_regno);
+      src_vsx_p = src_fp_p | src_vmx_p;
+    }
+  else
+    {
+      src_regno = -1;
+      src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
+    }
+
+  /* Register moves.  */
+  if (dest_regno >= 0 && src_regno >= 0)
+    {
+      if (dest_gpr_p)
+	{
+	  if (src_gpr_p)
+	    return "#";
+
+	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+	    return "#";
+	}
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (src_vsx_p)
+	    return "xxlor %x0,%x1,%x1";
+
+	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
+	return "vor %0,%1,%1";
+
+      else if (dest_fp_p && src_fp_p)
+	return "#";
+    }
+
+  /* Loads.  */
+  else if (dest_regno >= 0 && MEM_P (src))
+    {
+      if (dest_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "lq %0,%1";
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "lvx %0,%y1";
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "lxvw4x %x0,%y1";
+	  else
+	    return "lxvd2x %x0,%y1";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p)
+	return "lvx %0,%y1";
+
+      else if (dest_fp_p)
+	return "#";
+    }
+
+  /* Stores.  */
+  else if (src_regno >= 0 && MEM_P (dest))
+    {
+      if (src_gpr_p)
+	{
+ 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "stq %1,%0";
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && src_vmx_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "stvx %1,%y0";
+
+      else if (TARGET_VSX && src_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "stxvw4x %x1,%y0";
+	  else
+	    return "stxvd2x %x1,%y0";
+	}
+
+      else if (TARGET_ALTIVEC && src_vmx_p)
+	return "stvx %1,%y0";
+
+      else if (src_fp_p)
+	return "#";
+    }
+
+  /* Constants.  */
+  else if (dest_regno >= 0
+	   && (GET_CODE (src) == CONST_INT
+	       || GET_CODE (src) == CONST_DOUBLE
+	       || GET_CODE (src) == CONST_VECTOR))
+    {
+      if (dest_gpr_p)
+	return "#";
+
+      else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      else if (TARGET_ALTIVEC && dest_vmx_p)
+	return output_vec_const_move (operands);
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n===== Bad 128 bit move:\n");
+      debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+    }
+
+  gcc_unreachable ();
+}
+
+
+/* Given a comparison operation, return the bit number in CCR to test.  We
+   know this is a valid comparison.
+
+   SCC_P is 1 if this is for an scc.  That means that %D will have been
+   used instead of %C, so the bits will be in different places.
+
+   Return -1 if OP isn't a valid comparison for some reason.  */
+
+int
+ccr_bit (rtx op, int scc_p)
+{
+  enum rtx_code code = GET_CODE (op);
+  enum machine_mode cc_mode;
+  int cc_regnum;
+  int base_bit;
+  rtx reg;
+
+  if (!COMPARISON_P (op))
+    return -1;
 
   reg = XEXP (op, 0);
 
@@ -15303,7 +16669,7 @@ print_operand (FILE *file, rtx x, int code)
       return;
 
     case 'Y':
-      /* Like 'L', for third word of TImode  */
+      /* Like 'L', for third word of TImode/PTImode  */
       if (REG_P (x))
 	fputs (reg_names[REGNO (x) + 2], file);
       else if (MEM_P (x))
@@ -15353,7 +16719,7 @@ print_operand (FILE *file, rtx x, int code)
       return;
 
     case 'Z':
-      /* Like 'L', for last word of TImode.  */
+      /* Like 'L', for last word of TImode/PTImode.  */
       if (REG_P (x))
 	fputs (reg_names[REGNO (x) + 3], file);
       else if (MEM_P (x))
@@ -15384,7 +16750,8 @@ print_operand (FILE *file, rtx x, int code)
 	if ((TARGET_SPE || TARGET_E500_DOUBLE)
 	    && (GET_MODE_SIZE (GET_MODE (x)) == 8
 		|| GET_MODE (x) == TFmode
-		|| GET_MODE (x) == TImode))
+		|| GET_MODE (x) == TImode
+		|| GET_MODE (x) == PTImode))
 	  {
 	    /* Handle [reg].  */
 	    if (REG_P (tmp))
@@ -16767,7 +18134,8 @@ emit_unlikely_jump (rtx cond, rtx label)
 }
 
 /* A subroutine of the atomic operation splitters.  Emit a load-locked
-   instruction in MODE.  */
+   instruction in MODE.  For QI/HImode, possibly use a pattern than includes
+   the zero_extend operation.  */
 
 static void
 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -16776,12 +18144,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_load_lockedqi;
+      break;
+    case HImode:
+      fn = gen_load_lockedhi;
+      break;
     case SImode:
-      fn = gen_load_lockedsi;
+      if (GET_MODE (mem) == QImode)
+	fn = gen_load_lockedqi_si;
+      else if (GET_MODE (mem) == HImode)
+	fn = gen_load_lockedhi_si;
+      else
+	fn = gen_load_lockedsi;
       break;
     case DImode:
       fn = gen_load_lockeddi;
       break;
+    case TImode:
+      fn = gen_load_lockedti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -16798,12 +18180,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_store_conditionalqi;
+      break;
+    case HImode:
+      fn = gen_store_conditionalhi;
+      break;
     case SImode:
       fn = gen_store_conditionalsi;
       break;
     case DImode:
       fn = gen_store_conditionaldi;
       break;
+    case TImode:
+      fn = gen_store_conditionalti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -16950,7 +18341,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 {
   rtx boolval, retval, mem, oldval, newval, cond;
   rtx label1, label2, x, mask, shift;
-  enum machine_mode mode;
+  enum machine_mode mode, orig_mode;
   enum memmodel mod_s, mod_f;
   bool is_weak;
 
@@ -16962,22 +18353,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   is_weak = (INTVAL (operands[5]) != 0);
   mod_s = (enum memmodel) INTVAL (operands[6]);
   mod_f = (enum memmodel) INTVAL (operands[7]);
-  mode = GET_MODE (mem);
+  orig_mode = mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
   if (mode == QImode || mode == HImode)
     {
-      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
-      /* Shift and mask OLDVAL into position with the word.  */
+      /* Before power8, we didn't have access to lbarx/lharx, so generate a
+	 lwarx and shift/mask operations.  With power8, we need to do the
+	 comparison in SImode, but the store is still done in QI/HImode.  */
       oldval = convert_modes (SImode, mode, oldval, 1);
-      oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
-				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
-      /* Shift and mask NEWVAL into position within the word.  */
-      newval = convert_modes (SImode, mode, newval, 1);
-      newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
-				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      if (!TARGET_SYNC_HI_QI)
+	{
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+	  /* Shift and mask OLDVAL into position with the word.  */
+	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+	  /* Shift and mask NEWVAL into position within the word.  */
+	  newval = convert_modes (SImode, mode, newval, 1);
+	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	}
 
       /* Prepare to adjust the return value.  */
       retval = gen_reg_rtx (SImode);
@@ -17006,7 +18404,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
     }
 
   cond = gen_reg_rtx (CCmode);
-  x = gen_rtx_COMPARE (CCmode, x, oldval);
+  /* If we have TImode, synthesize a comparison.  */
+  if (mode != TImode)
+    x = gen_rtx_COMPARE (CCmode, x, oldval);
+  else
+    {
+      rtx xor1_result = gen_reg_rtx (DImode);
+      rtx xor2_result = gen_reg_rtx (DImode);
+      rtx or_result = gen_reg_rtx (DImode);
+      rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+      rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+      rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+      rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+      emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+      emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+      emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+      x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+    }
+
   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17016,7 +18432,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   if (mask)
     x = rs6000_mask_atomic_subword (retval, newval, mask);
 
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (orig_mode, cond, mem, x);
 
   if (!is_weak)
     {
@@ -17034,6 +18450,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 
   if (shift)
     rs6000_finish_atomic_subword (operands[1], retval, shift);
+  else if (mode != GET_MODE (operands[1]))
+    convert_move (operands[1], retval, 1);
 
   /* In all cases, CR0 contains EQ on success, and NE on failure.  */
   x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17057,7 +18475,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
   mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
-  if (mode == QImode || mode == HImode)
+  if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
     {
       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
@@ -17106,53 +18524,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 {
   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
   enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode store_mode = mode;
   rtx label, x, cond, mask, shift;
   rtx before = orig_before, after = orig_after;
 
   mask = shift = NULL_RTX;
+  /* On power8, we want to use SImode for the operation.  On previous systems,
+     use the operation in a subword and shift/mask to get the proper byte or
+     halfword.  */
   if (mode == QImode || mode == HImode)
     {
-      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
-      /* Shift and mask VAL into position with the word.  */
-      val = convert_modes (SImode, mode, val, 1);
-      val = expand_simple_binop (SImode, ASHIFT, val, shift,
-				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      if (TARGET_SYNC_HI_QI)
+	{
+	  val = convert_modes (SImode, mode, val, 1);
 
-      switch (code)
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  mode = SImode;
+	}
+      else
 	{
-	case IOR:
-	case XOR:
-	  /* We've already zero-extended VAL.  That is sufficient to
-	     make certain that it does not affect other bits.  */
-	  mask = NULL;
-	  break;
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
-	case AND:
-	  /* If we make certain that all of the other bits in VAL are
-	     set, that will be sufficient to not affect other bits.  */
-	  x = gen_rtx_NOT (SImode, mask);
-	  x = gen_rtx_IOR (SImode, x, val);
-	  emit_insn (gen_rtx_SET (VOIDmode, val, x));
-	  mask = NULL;
-	  break;
+	  /* Shift and mask VAL into position with the word.  */
+	  val = convert_modes (SImode, mode, val, 1);
+	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
-	case NOT:
-	case PLUS:
-	case MINUS:
-	  /* These will all affect bits outside the field and need
-	     adjustment via MASK within the loop.  */
-	  break;
+	  switch (code)
+	    {
+	    case IOR:
+	    case XOR:
+	      /* We've already zero-extended VAL.  That is sufficient to
+		 make certain that it does not affect other bits.  */
+	      mask = NULL;
+	      break;
 
-	default:
-	  gcc_unreachable ();
-	}
+	    case AND:
+	      /* If we make certain that all of the other bits in VAL are
+		 set, that will be sufficient to not affect other bits.  */
+	      x = gen_rtx_NOT (SImode, mask);
+	      x = gen_rtx_IOR (SImode, x, val);
+	      emit_insn (gen_rtx_SET (VOIDmode, val, x));
+	      mask = NULL;
+	      break;
 
-      /* Prepare to adjust the return value.  */
-      before = gen_reg_rtx (SImode);
-      if (after)
-	after = gen_reg_rtx (SImode);
-      mode = SImode;
+	    case NOT:
+	    case PLUS:
+	    case MINUS:
+	      /* These will all affect bits outside the field and need
+		 adjustment via MASK within the loop.  */
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  store_mode = mode = SImode;
+	}
     }
 
   mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17185,9 +18620,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
       x = rs6000_mask_atomic_subword (before, x, mask);
     }
+  else if (store_mode != mode)
+    x = convert_modes (store_mode, mode, x, 1);
 
   cond = gen_reg_rtx (CCmode);
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (store_mode, cond, mem, x);
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
   emit_unlikely_jump (x, label);
@@ -17196,11 +18633,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 
   if (shift)
     {
+      /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+	 then do the calcuations in a SImode register.  */
       if (orig_before)
 	rs6000_finish_atomic_subword (orig_before, before, shift);
       if (orig_after)
 	rs6000_finish_atomic_subword (orig_after, after, shift);
     }
+  else if (store_mode != mode)
+    {
+      /* QImode/HImode on machines with lbarx/lharx where we do the native
+	 operation and then do the calcuations in a SImode register.  */
+      if (orig_before)
+	convert_move (orig_before, before, 1);
+      if (orig_after)
+	convert_move (orig_after, after, 1);
+    }
   else if (orig_after && after != orig_after)
     emit_move_insn (orig_after, after);
 }
@@ -22847,6 +24295,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
                  || rs6000_cpu_attr == CPU_POWER4
                  || rs6000_cpu_attr == CPU_POWER5
 		 || rs6000_cpu_attr == CPU_POWER7
+		 || rs6000_cpu_attr == CPU_POWER8
                  || rs6000_cpu_attr == CPU_CELL)
                 && recog_memoized (dep_insn)
                 && (INSN_CODE (dep_insn) >= 0))
@@ -23129,7 +24578,8 @@ is_microcoded_insn (rtx insn)
   if (rs6000_cpu_attr == CPU_CELL)
     return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
 
-  if (rs6000_sched_groups)
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
     {
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_EXT_U
@@ -23154,7 +24604,8 @@ is_cracked_insn (rtx insn)
       || GET_CODE (PATTERN (insn)) == CLOBBER)
     return false;
 
-  if (rs6000_sched_groups)
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
     {
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_U || type == TYPE_STORE_U
@@ -23433,6 +24884,8 @@ rs6000_issue_rate (void)
   case CPU_POWER6:
   case CPU_POWER7:
     return 5;
+  case CPU_POWER8:
+    return 7;
   default:
     return 1;
   }
@@ -24060,6 +25513,39 @@ insn_must_be_first_in_group (rtx insn)
           break;
         }
       break;
+    case PROCESSOR_POWER8:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_CR_LOGICAL:
+        case TYPE_DELAYED_CR:
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_COMPARE:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_VAR_DELAYED_COMPARE:
+        case TYPE_IMUL_COMPARE:
+        case TYPE_LMUL_COMPARE:
+        case TYPE_SYNC:
+        case TYPE_ISYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT:
+        case TYPE_LOAD_EXT_U:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_UX:
+        case TYPE_VECSTORE:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+          return true;
+        default:
+          break;
+        }
+      break;
     default:
       break;
     }
@@ -24138,6 +25624,25 @@ insn_must_be_last_in_group (rtx insn)
         break;
     }
     break;
+  case PROCESSOR_POWER8:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_MFCR:
+      case TYPE_MTCR:
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+      case TYPE_LOAD_EXT_U:
+      case TYPE_LOAD_EXT_UX:
+      case TYPE_STORE_UX:
+        return true;
+      default:
+        break;
+    }
+    break;
   default:
     break;
   }
@@ -24227,8 +25732,9 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
       if (can_issue_more && !is_branch_slot_insn (next_insn))
 	can_issue_more--;
 
-      /* Power6 and Power7 have special group ending nop. */
-      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
+      /* Do we have a special group ending nop? */
+      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+	  || rs6000_cpu_attr == CPU_POWER8)
 	{
 	  nop = gen_group_ending_nop ();
 	  emit_insn_before (nop, next_insn);
@@ -26431,7 +27937,8 @@ rs6000_register_move_cost (enum machine_mode mode,
       /* For those processors that have slow LR/CTR moves, make them more
          expensive than memory in order to bias spills to memory .*/
       else if ((rs6000_cpu == PROCESSOR_POWER6
-		|| rs6000_cpu == PROCESSOR_POWER7)
+		|| rs6000_cpu == PROCESSOR_POWER7
+		|| rs6000_cpu == PROCESSOR_POWER8)
 	       && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
         ret = 6 * hard_regno_nregs[0][mode];
 
@@ -26441,7 +27948,7 @@ rs6000_register_move_cost (enum machine_mode mode,
     }
 
   /* If we have VSX, we can easily move between FPR or Altivec registers.  */
-  else if (VECTOR_UNIT_VSX_P (mode)
+  else if (VECTOR_MEM_VSX_P (mode)
 	   && reg_classes_intersect_p (to, VSX_REGS)
 	   && reg_classes_intersect_p (from, VSX_REGS))
     ret = 2 * hard_regno_nregs[32][mode];
@@ -26482,7 +27989,8 @@ rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
 
   if (reg_classes_intersect_p (rclass, GENERAL_REGS))
     ret = 4 * hard_regno_nregs[0][mode];
-  else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
+  else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
+	    || reg_classes_intersect_p (rclass, VSX_REGS)))
     ret = 4 * hard_regno_nregs[32][mode];
   else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
     ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
@@ -26928,26 +28436,31 @@ bool
 altivec_expand_vec_perm_const (rtx operands[4])
 {
   struct altivec_perm_insn {
+    HOST_WIDE_INT mask;
     enum insn_code impl;
     unsigned char perm[16];
   };
   static const struct altivec_perm_insn patterns[] = {
-    { CODE_FOR_altivec_vpkuhum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
       {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
-    { CODE_FOR_altivec_vpkuwum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
       {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
-    { CODE_FOR_altivec_vmrghb,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb,
       {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
-    { CODE_FOR_altivec_vmrghh,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh,
       {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
-    { CODE_FOR_altivec_vmrghw,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw,
       {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
-    { CODE_FOR_altivec_vmrglb,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb,
       {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
-    { CODE_FOR_altivec_vmrglh,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh,
       {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
-    { CODE_FOR_altivec_vmrglw,
-      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw,
+      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+      {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+      {  4,  5,  6,  7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
   };
 
   unsigned int i, j, elt, which;
@@ -27047,6 +28560,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
     {
       bool swapped;
 
+      if ((patterns[j].mask & rs6000_isa_flags) == 0)
+	continue;
+
       elt = patterns[j].perm[0];
       if (perm[0] == elt)
 	swapped = false;
@@ -27680,6 +29196,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
 {
   { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
   { "cmpb",			OPTION_MASK_CMPB,		false, true  },
+  { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
+  { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
   { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
   { "fprnd",			OPTION_MASK_FPRND,		false, true  },
   { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
@@ -27688,14 +29206,21 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "mfpgpr",			OPTION_MASK_MFPGPR,		false, true  },
   { "mulhw",			OPTION_MASK_MULHW,		false, true  },
   { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
-  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
+  { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
+  { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
+  { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
+  { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
   { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
   { "string",			OPTION_MASK_STRING,		false, true  },
+  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
+  { "upper-regs-df",		OPTION_MASK_UPPER_REGS_DF,	false, false },
+  { "upper-regs-sf",		OPTION_MASK_UPPER_REGS_SF,	false, false },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "vsx-timode",		OPTION_MASK_VSX_TIMODE,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
   { "aix64",			OPTION_MASK_64BIT,		false, false },
@@ -27735,6 +29260,8 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
   { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
   { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
   { "cell",		 RS6000_BTM_CELL,	false, false },
+  { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
+  { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
@@ -28251,7 +29778,6 @@ rs6000_print_options_internal (FILE *file,
   size_t cur_column;
   size_t max_column = 76;
   const char *comma = "";
-  const char *nl = "\n";
 
   if (indent)
     start_column += fprintf (file, "%*s", indent, "");
@@ -28282,7 +29808,6 @@ rs6000_print_options_internal (FILE *file,
 	      fprintf (stderr, ", \\\n%*s", (int)start_column, "");
 	      cur_column = start_column + len;
 	      comma = "";
-	      nl = "\n\n";
 	    }
 
 	  fprintf (file, "%s%s%s%s", comma, prefix, no_str,
@@ -28292,7 +29817,7 @@ rs6000_print_options_internal (FILE *file,
 	}
     }
 
-  fputs (nl, file);
+  fputs ("\n", file);
 }
 
 /* Helper function to print the current isa options on a line.  */
@@ -28680,6 +30205,656 @@ rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
     add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
 }
 
+
+/* Helper function for rs6000_split_logical to emit a logical instruction after
+   spliting the operation to single GPR registers.
+
+   DEST is the destination register.
+   OP1 and OP2 are the input source registers.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_inner (rtx dest,
+			    rtx op1,
+			    rtx op2,
+			    enum rtx_code code,
+			    enum machine_mode mode,
+			    bool complement_final_p,
+			    bool complement_op1_p,
+			    bool complement_op2_p,
+			    rtx clobber_reg)
+{
+  rtx bool_rtx;
+  rtx set_rtx;
+
+  /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
+  if (op2 && GET_CODE (op2) == CONST_INT
+      && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
+      && !complement_final_p && !complement_op1_p && !complement_op2_p)
+    {
+      HOST_WIDE_INT mask = GET_MODE_MASK (mode);
+      HOST_WIDE_INT value = INTVAL (op2) & mask;
+
+      /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
+      if (code == AND)
+	{
+	  if (value == 0)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+	      return;
+	    }
+
+	  else if (value == mask)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+	      return;
+	    }
+	}
+
+      /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
+	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
+      else if (code == IOR || code == XOR)
+	{
+	  if (value == 0)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+	      return;
+	    }
+	}
+    }
+
+  if (complement_op1_p)
+    op1 = gen_rtx_NOT (mode, op1);
+
+  if (complement_op2_p)
+    op2 = gen_rtx_NOT (mode, op2);
+
+  bool_rtx = ((code == NOT)
+	      ? gen_rtx_NOT (mode, op1)
+	      : gen_rtx_fmt_ee (code, mode, op1, op2));
+
+  if (complement_final_p)
+    bool_rtx = gen_rtx_NOT (mode, bool_rtx);
+
+  set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
+
+  /* Is this AND with an explicit clobber?  */
+  if (clobber_reg)
+    {
+      rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
+      set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
+    }
+
+  emit_insn (set_rtx);
+  return;
+}
+
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
+   operations are split immediately during RTL generation to allow for more
+   optimizations of the AND/IOR/XOR.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_di (rtx operands[3],
+			 enum rtx_code code,
+			 bool complement_final_p,
+			 bool complement_op1_p,
+			 bool complement_op2_p,
+			 rtx clobber_reg)
+{
+  const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
+  const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
+  const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
+  enum hi_lo { hi = 0, lo = 1 };
+  rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
+  size_t i;
+
+  op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
+  op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
+  op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
+  op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
+
+  if (code == NOT)
+    op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
+  else
+    {
+      if (GET_CODE (operands[2]) != CONST_INT)
+	{
+	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
+	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
+	}
+      else
+	{
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  HOST_WIDE_INT value_hi_lo[2];
+
+	  gcc_assert (!complement_final_p);
+	  gcc_assert (!complement_op1_p);
+	  gcc_assert (!complement_op2_p);
+
+	  value_hi_lo[hi] = value >> 32;
+	  value_hi_lo[lo] = value & lower_32bits;
+
+	  for (i = 0; i < 2; i++)
+	    {
+	      HOST_WIDE_INT sub_value = value_hi_lo[i];
+
+	      if (sub_value & sign_bit)
+		sub_value |= upper_32bits;
+
+	      op2_hi_lo[i] = GEN_INT (sub_value);
+
+	      /* If this is an AND instruction, check to see if we need to load
+		 the value in a register.  */
+	      if (code == AND && sub_value != -1 && sub_value != 0
+		  && !and_operand (op2_hi_lo[i], SImode))
+		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
+	    }
+	}
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      /* Split large IOR/XOR operations.  */
+      if ((code == IOR || code == XOR)
+	  && GET_CODE (op2_hi_lo[i]) == CONST_INT
+	  && !complement_final_p
+	  && !complement_op1_p
+	  && !complement_op2_p
+	  && clobber_reg == NULL_RTX
+	  && !logical_const_operand (op2_hi_lo[i], SImode))
+	{
+	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
+	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
+	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  /* Make sure the constant is sign extended.  */
+	  if ((hi_16bits & sign_bit) != 0)
+	    hi_16bits |= upper_32bits;
+
+	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
+				      code, SImode, false, false, false,
+				      NULL_RTX);
+
+	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
+				      code, SImode, false, false, false,
+				      NULL_RTX);
+	}
+      else
+	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
+				    code, SImode, complement_final_p,
+				    complement_op1_p, complement_op2_p,
+				    clobber_reg);
+    }
+
+  return;
+}
+
+/* Split the insns that make up boolean operations operating on multiple GPR
+   registers.  The boolean MD patterns ensure that the inputs either are
+   exactly the same as the output registers, or there is no overlap.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+void
+rs6000_split_logical (rtx operands[3],
+		      enum rtx_code code,
+		      bool complement_final_p,
+		      bool complement_op1_p,
+		      bool complement_op2_p,
+		      rtx clobber_reg)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum machine_mode sub_mode;
+  rtx op0, op1, op2;
+  int sub_size, regno0, regno1, nregs, i;
+
+  /* If this is DImode, use the specialized version that can run before
+     register allocation.  */
+  if (mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical_di (operands, code, complement_final_p,
+			       complement_op1_p, complement_op2_p,
+			       clobber_reg);
+      return;
+    }
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = (code == NOT) ? NULL_RTX : operands[2];
+  sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
+  sub_size = GET_MODE_SIZE (sub_mode);
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+
+  gcc_assert (reload_completed);
+  gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+  gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+  nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
+  gcc_assert (nregs > 1);
+
+  if (op2 && REG_P (op2))
+    gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+  for (i = 0; i < nregs; i++)
+    {
+      int offset = i * sub_size;
+      rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
+      rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
+      rtx sub_op2 = ((code == NOT)
+		     ? NULL_RTX
+		     : simplify_subreg (sub_mode, op2, mode, offset));
+
+      rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
+				  complement_final_p, complement_op1_p,
+				  complement_op2_p, clobber_reg);
+    }
+
+  return;
+}
+
+
+/* Return true if the peephole2 can combine a load involving a combination of
+   an addis instruction and a load with an offset that can be fused together on
+   a power8.
+
+   The operands are:
+	operands[0]	register set with addis
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].
+
+   In addition, we are passed a boolean that is true if this is a peephole2,
+   and we can use see if the addis_reg is dead after the insn and can be
+   replaced by the target register.  */
+
+bool
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx addr;
+  rtx base_reg;
+
+  /* Validate arguments.  */
+  if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
+    return false;
+
+  if (!base_reg_operand (target, GET_MODE (target)))
+    return false;
+
+  if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
+    return false;
+
+  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+    return false;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND
+      || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+    mem = XEXP (mem, 0);
+
+  if (!MEM_P (mem))
+    return false;
+
+  addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    return false;
+
+  /* Validate that the register used to load the high value is either the
+     register being loaded, or we can safely replace its use in a peephole2.
+
+     If this is a peephole2, we assume that there are 2 instructions in the
+     peephole (addis and load), so we want to check if the target register was
+     not used in the memory address and the register to hold the addis result
+     is dead after the peephole.  */
+  if (REGNO (addis_reg) != REGNO (target))
+    {
+      if (!peep2_p)
+	return false;
+
+      if (reg_mentioned_p (target, mem))
+	return false;
+
+      if (!peep2_reg_dead_p (2, addis_reg))
+	return false;
+    }
+
+  base_reg = XEXP (addr, 0);
+  return REGNO (addis_reg) == REGNO (base_reg);
+}
+
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
+   sequence.  We adjust the addis register to use the target register.  If the
+   load sign extends, we adjust the code to do the zero extending load, and an
+   explicit sign extension later since the fusion only covers zero extending
+   loads.
+
+   The operands are:
+	operands[0]	register set with addis (to be replaced with target)
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+void
+expand_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx orig_mem = operands[3];
+  rtx  new_addr, new_mem, orig_addr, offset;
+  enum rtx_code plus_or_lo_sum;
+  enum machine_mode target_mode = GET_MODE (target);
+  enum machine_mode extend_mode = target_mode;
+  enum machine_mode ptr_mode = Pmode;
+  enum rtx_code extend = UNKNOWN;
+  rtx addis_reg = ((ptr_mode == target_mode)
+		   ? target
+		   : simplify_subreg (ptr_mode, target, target_mode, 0));
+
+  if (GET_CODE (orig_mem) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+    {
+      extend = GET_CODE (orig_mem);
+      orig_mem = XEXP (orig_mem, 0);
+      target_mode = GET_MODE (orig_mem);
+    }
+
+  gcc_assert (MEM_P (orig_mem));
+
+  orig_addr = XEXP (orig_mem, 0);
+  plus_or_lo_sum = GET_CODE (orig_addr);
+  gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
+
+  offset = XEXP (orig_addr, 1);
+  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
+  new_mem = change_address (orig_mem, target_mode, new_addr);
+
+  if (extend != UNKNOWN)
+    new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
+
+  emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+  emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
+
+  if (extend == SIGN_EXTEND)
+    {
+      int sub_off = ((BYTES_BIG_ENDIAN)
+		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
+		     : 0);
+      rtx sign_reg
+	= simplify_subreg (target_mode, target, extend_mode, sub_off);
+
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
+    }
+
+  return;
+}
+
+/* Return a string to fuse an addis instruction with a gpr load to the same
+   register that we loaded up the addis instruction.  The code is complicated,
+   so we call output_asm_insn directly, and just return "".
+
+   The operands are:
+	operands[0]	register set with addis (must be same reg as target).
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+const char *
+emit_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx fuse_ops[10];
+  rtx addr;
+  rtx load_offset;
+  const char *addis_str = NULL;
+  const char *load_str = NULL;
+  const char *extend_insn = NULL;
+  const char *mode_name = NULL;
+  char insn_template[80];
+  enum machine_mode mode;
+  const char *comment_str = ASM_COMMENT_START;
+  bool sign_p = false;
+
+  gcc_assert (REG_P (addis_reg) && REG_P (target));
+  gcc_assert (REGNO (addis_reg) == REGNO (target));
+
+  if (*comment_str == ' ')
+    comment_str++;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
+    {
+      sign_p = true;
+      mem = XEXP (mem, 0);
+    }
+
+  gcc_assert (MEM_P (mem));
+  addr = XEXP (mem, 0);
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    gcc_unreachable ();
+
+  load_offset = XEXP (addr, 1);
+
+  /* Now emit the load instruction to the same register.  */
+  mode = GET_MODE (mem);
+  switch (mode)
+    {
+    case QImode:
+      mode_name = "char";
+      load_str = "lbz";
+      extend_insn = "extsb %0,%0";
+      break;
+
+    case HImode:
+      mode_name = "short";
+      load_str = "lhz";
+      extend_insn = "extsh %0,%0";
+      break;
+
+    case SImode:
+      mode_name = "int";
+      load_str = "lwz";
+      extend_insn = "extsw %0,%0";
+      break;
+
+    case DImode:
+      if (TARGET_POWERPC64)
+	{
+	  mode_name = "long";
+	  load_str = "ld";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Emit the addis instruction.  */
+  fuse_ops[0] = target;
+  if (satisfies_constraint_L (addis_value))
+    {
+      fuse_ops[1] = addis_value;
+      addis_str = "lis %0,%v1";
+    }
+
+  else if (GET_CODE (addis_value) == PLUS)
+    {
+      rtx op0 = XEXP (addis_value, 0);
+      rtx op1 = XEXP (addis_value, 1);
+
+      if (REG_P (op0) && CONST_INT_P (op1)
+	  && satisfies_constraint_L (op1))
+	{
+	  fuse_ops[1] = op0;
+	  fuse_ops[2] = op1;
+	  addis_str = "addis %0,%1,%v2";
+	}
+    }
+
+  else if (GET_CODE (addis_value) == HIGH)
+    {
+      rtx value = XEXP (addis_value, 0);
+      if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
+	{
+	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
+	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
+	  if (TARGET_ELF)
+	    addis_str = "addis %0,%2,%1@toc@ha";
+
+	  else if (TARGET_XCOFF)
+	    addis_str = "addis %0,%1@u(%2)";
+
+	  else
+	    gcc_unreachable ();
+	}
+
+      else if (GET_CODE (value) == PLUS)
+	{
+	  rtx op0 = XEXP (value, 0);
+	  rtx op1 = XEXP (value, 1);
+
+	  if (GET_CODE (op0) == UNSPEC
+	      && XINT (op0, 1) == UNSPEC_TOCREL
+	      && CONST_INT_P (op1))
+	    {
+	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
+	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
+	      fuse_ops[3] = op1;
+	      if (TARGET_ELF)
+		addis_str = "addis %0,%2,%1+%3@toc@ha";
+
+	      else if (TARGET_XCOFF)
+		addis_str = "addis %0,%1+%3@u(%2)";
+
+	      else
+		gcc_unreachable ();
+	    }
+	}
+
+      else if (satisfies_constraint_L (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%v1";
+	}
+
+      else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%1@ha";
+	}
+    }
+
+  if (!addis_str)
+    fatal_insn ("Could not generate addis value for fusion", addis_value);
+
+  sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
+	   comment_str, mode_name);
+  output_asm_insn (insn_template, fuse_ops);
+
+  /* Emit the D-form load instruction.  */
+  if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == UNSPEC
+	   && XINT (load_offset, 1) == UNSPEC_TOCREL)
+    {
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (load_offset, 0, 0);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == PLUS
+	   && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
+	   && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
+	   && CONST_INT_P (XEXP (load_offset, 1)))
+    {
+      rtx tocrel_unspec = XEXP (load_offset, 0);
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
+      fuse_ops[2] = XEXP (load_offset, 1);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else
+    fatal_insn ("Unable to generate load offset for fusion", load_offset);
+
+  /* Handle sign extension.  The peephole2 pass generates this as a separate
+     insn, but we handle it just in case it got reattached.  */
+  if (sign_p)
+    {
+      gcc_assert (extend_insn != NULL);
+      output_asm_insn (extend_insn, fuse_ops);
+    }
+
+  return "";
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 8a360841040b..c46963a188c7 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -92,7 +92,7 @@
 #ifdef HAVE_AS_POWER8
 #define ASM_CPU_POWER8_SPEC "-mpower8"
 #else
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
 #endif
 
 #ifdef HAVE_AS_DCI
@@ -164,6 +164,7 @@
 %{mcpu=e6500: -me6500} \
 %{maltivec: -maltivec} \
 %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+%{mpower8-vector|mcrypto|mdirect-move: %{!mcpu*: %(asm_cpu_power8)}} \
 -many"
 
 #define CPP_DEFAULT_SPEC ""
@@ -277,6 +278,19 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_POPCNTD 0
 #endif
 
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
+   waitasecond instruction.  Allow -mpower8-fusion, since it does not add new
+   instructions.  */
+
+#ifndef HAVE_AS_POWER8
+#undef  TARGET_DIRECT_MOVE
+#undef  TARGET_CRYPTO
+#undef  TARGET_P8_VECTOR
+#define TARGET_DIRECT_MOVE 0
+#define TARGET_CRYPTO 0
+#define TARGET_P8_VECTOR 0
+#endif
+
 /* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync.  If
    not, generate the lwsync code as an integer constant.  */
 #ifdef HAVE_AS_LWSYNC
@@ -386,6 +400,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_DEBUG_TARGET	(rs6000_debug & MASK_DEBUG_TARGET)
 #define TARGET_DEBUG_BUILTIN	(rs6000_debug & MASK_DEBUG_BUILTIN)
 
+/* Describe the vector unit used for arithmetic operations.  */
 extern enum rs6000_vector rs6000_vector_unit[];
 
 #define VECTOR_UNIT_NONE_P(MODE)			\
@@ -394,12 +409,25 @@ extern enum rs6000_vector rs6000_vector_unit[];
 #define VECTOR_UNIT_VSX_P(MODE)				\
   (rs6000_vector_unit[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_UNIT_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
+
 #define VECTOR_UNIT_ALTIVEC_P(MODE)			\
   (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
+   altivec (VMX) or VSX vector instructions.  P8 vector support is upwards
+   compatible, so allow it as well, rather than changing all of the uses of the
+   macro.  */
 #define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE)		\
-  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC 	\
-   || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
 
 /* Describe whether to use VSX loads or Altivec loads.  For now, just use the
    same unit as the vector unit we are using, but we may want to migrate to
@@ -412,12 +440,21 @@ extern enum rs6000_vector rs6000_vector_mem[];
 #define VECTOR_MEM_VSX_P(MODE)				\
   (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_MEM_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
 #define VECTOR_MEM_ALTIVEC_P(MODE)			\
   (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
 #define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE)		\
-  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC 	\
-   || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
 
 /* Return the alignment of a given vector type, which is set based on the
    vector unit use.  VSX for instance can load 32 or 64 bit aligned words
@@ -479,15 +516,32 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIDUZ	TARGET_POPCNTD
 #define TARGET_FCTIWUZ	TARGET_POPCNTD
 
+#define TARGET_XSCVDPSPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVSPDPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
+   in power7, so conditionalize them on p8 features.  TImode syncs need quad
+   memory support.  */
+#define TARGET_SYNC_HI_QI	(TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
+#define TARGET_SYNC_TI		TARGET_QUAD_MEMORY
+
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
+   to allocate the SDmode stack slot to get the value into the proper location
+   in the register.  */
+#define TARGET_NO_SDMODE_STACK	(TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
+
 /* In switching from using target_flags to using rs6000_isa_flags, the options
    machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>.  For now map
    OPTION_MASK_<xxx> back into MASK_<xxx>.  */
 #define MASK_ALTIVEC			OPTION_MASK_ALTIVEC
 #define MASK_CMPB			OPTION_MASK_CMPB
+#define MASK_CRYPTO			OPTION_MASK_CRYPTO
 #define MASK_DFP			OPTION_MASK_DFP
+#define MASK_DIRECT_MOVE		OPTION_MASK_DIRECT_MOVE
 #define MASK_DLMZB			OPTION_MASK_DLMZB
 #define MASK_EABI			OPTION_MASK_EABI
 #define MASK_FPRND			OPTION_MASK_FPRND
+#define MASK_P8_FUSION			OPTION_MASK_P8_FUSION
 #define MASK_HARD_FLOAT			OPTION_MASK_HARD_FLOAT
 #define MASK_ISEL			OPTION_MASK_ISEL
 #define MASK_MFCRF			OPTION_MASK_MFCRF
@@ -495,6 +549,7 @@ extern int rs6000_vector_align[];
 #define MASK_MULHW			OPTION_MASK_MULHW
 #define MASK_MULTIPLE			OPTION_MASK_MULTIPLE
 #define MASK_NO_UPDATE			OPTION_MASK_NO_UPDATE
+#define MASK_P8_VECTOR			OPTION_MASK_P8_VECTOR
 #define MASK_POPCNTB			OPTION_MASK_POPCNTB
 #define MASK_POPCNTD			OPTION_MASK_POPCNTD
 #define MASK_PPC_GFXOPT			OPTION_MASK_PPC_GFXOPT
@@ -505,6 +560,7 @@ extern int rs6000_vector_align[];
 #define MASK_STRING			OPTION_MASK_STRING
 #define MASK_UPDATE			OPTION_MASK_UPDATE
 #define MASK_VSX			OPTION_MASK_VSX
+#define MASK_VSX_TIMODE			OPTION_MASK_VSX_TIMODE
 
 #ifndef IN_LIBGCC2
 #define MASK_POWERPC64			OPTION_MASK_POWERPC64
@@ -558,6 +614,25 @@ extern int rs6000_vector_align[];
 			  || rs6000_cpu == PROCESSOR_PPC8548)
 
 
+/* Whether SF/DF operations are supported on the E500.  */
+#define TARGET_SF_SPE	(TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT	\
+			 && !TARGET_FPRS)
+
+#define TARGET_DF_SPE	(TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT	\
+			 && !TARGET_FPRS && TARGET_E500_DOUBLE)
+
+/* Whether SF/DF operations are supported by by the normal floating point unit
+   (or the vector/scalar unit).  */
+#define TARGET_SF_FPR	(TARGET_HARD_FLOAT && TARGET_FPRS		\
+			 && TARGET_SINGLE_FLOAT)
+
+#define TARGET_DF_FPR	(TARGET_HARD_FLOAT && TARGET_FPRS		\
+			 && TARGET_DOUBLE_FLOAT)
+
+/* Whether SF/DF operations are supported by any hardware.  */
+#define TARGET_SF_INSN	(TARGET_SF_FPR || TARGET_SF_SPE)
+#define TARGET_DF_INSN	(TARGET_DF_FPR || TARGET_DF_SPE)
+
 /* Which machine supports the various reciprocal estimate instructions.  */
 #define TARGET_FRES	(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
 			 && TARGET_FPRS && TARGET_SINGLE_FLOAT)
@@ -1004,7 +1079,9 @@ extern unsigned rs6000_pointer_size;
 
 #define REG_ALLOC_ORDER						\
   {32,								\
-   45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,		\
+   /* move fr13 (ie 45) later, so if we need TFmode, it does */	\
+   /* not use fr14 which is a saved register.  */		\
+   44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45,		\
    33,								\
    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,		\
    50, 49, 48, 47, 46,						\
@@ -1064,8 +1141,11 @@ extern unsigned rs6000_pointer_size;
 #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
 
 /* Alternate name for any vector register supporting logical operations, no
-   matter which instruction set(s) are available.  */
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
+   matter which instruction set(s) are available.  Allow GPRs as well as the
+   vector registers.  */
+#define VLOGICAL_REGNO_P(N)						\
+  (INT_REGNO_P (N) || ALTIVEC_REGNO_P (N)				\
+   || (TARGET_VSX && FP_REGNO_P (N)))					\
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
    to hold something of mode MODE.  */
@@ -1125,28 +1205,32 @@ extern unsigned rs6000_pointer_size;
 /* Value is 1 if it is a good idea to tie two pseudo registers
    when one has mode MODE1 and one has mode MODE2.
    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
-   for any hard reg, then this must be 0 for correct output.  */
-#define MODES_TIEABLE_P(MODE1, MODE2) \
-  (SCALAR_FLOAT_MODE_P (MODE1)			\
+   for any hard reg, then this must be 0 for correct output.
+
+   PTImode cannot tie with other modes because PTImode is restricted to even
+   GPR registers, and TImode can go in any GPR as well as VSX registers (PR
+   57744).  */
+#define MODES_TIEABLE_P(MODE1, MODE2)		\
+  ((MODE1) == PTImode				\
+   ? (MODE2) == PTImode				\
+   : (MODE2) == PTImode				\
+   ? 0						\
+   : SCALAR_FLOAT_MODE_P (MODE1)		\
    ? SCALAR_FLOAT_MODE_P (MODE2)		\
    : SCALAR_FLOAT_MODE_P (MODE2)		\
-   ? SCALAR_FLOAT_MODE_P (MODE1)		\
+   ? 0						\
    : GET_MODE_CLASS (MODE1) == MODE_CC		\
    ? GET_MODE_CLASS (MODE2) == MODE_CC		\
    : GET_MODE_CLASS (MODE2) == MODE_CC		\
-   ? GET_MODE_CLASS (MODE1) == MODE_CC		\
+   ? 0						\
    : SPE_VECTOR_MODE (MODE1)			\
    ? SPE_VECTOR_MODE (MODE2)			\
    : SPE_VECTOR_MODE (MODE2)			\
-   ? SPE_VECTOR_MODE (MODE1)			\
-   : ALTIVEC_VECTOR_MODE (MODE1)		\
-   ? ALTIVEC_VECTOR_MODE (MODE2)		\
-   : ALTIVEC_VECTOR_MODE (MODE2)		\
-   ? ALTIVEC_VECTOR_MODE (MODE1)		\
+   ? 0						\
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
    ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
-   ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
+   ? 0						\
    : 1)
 
 /* Post-reload, we can't use any new AltiVec registers, as we already
@@ -1337,7 +1421,18 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wa,		/* Any VSX register */
   RS6000_CONSTRAINT_wd,		/* VSX register for V2DF */
   RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
+  RS6000_CONSTRAINT_wg,		/* FPR register for -mmfpgpr */
+  RS6000_CONSTRAINT_wl,		/* FPR register for LFIWAX */
+  RS6000_CONSTRAINT_wm,		/* VSX register for direct move */
+  RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_ws,		/* VSX register for DF */
+  RS6000_CONSTRAINT_wt,		/* VSX register for TImode */
+  RS6000_CONSTRAINT_wu,		/* Altivec register for float load/stores.  */
+  RS6000_CONSTRAINT_wv,		/* Altivec register for double load/stores.  */
+  RS6000_CONSTRAINT_ww,		/* FP or VSX register for vsx float ops.  */
+  RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
+  RS6000_CONSTRAINT_wy,		/* VSX register for SF */
+  RS6000_CONSTRAINT_wz,		/* FPR register for LFIWZX */
   RS6000_CONSTRAINT_MAX
 };
 
@@ -1522,7 +1617,7 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    NONLOCAL needs twice Pmode to maintain both backchain and SP.  */
 #define STACK_SAVEAREA_MODE(LEVEL)	\
   (LEVEL == SAVE_FUNCTION ? VOIDmode	\
-  : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : TImode) : Pmode)
+  : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : PTImode) : Pmode)
 
 /* Minimum and maximum general purpose registers used to hold arguments.  */
 #define GP_ARG_MIN_REG 3
@@ -2369,6 +2464,8 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_ALWAYS	0		/* Always enabled.  */
 #define RS6000_BTM_ALTIVEC	MASK_ALTIVEC	/* VMX/altivec vectors.  */
 #define RS6000_BTM_VSX		MASK_VSX	/* VSX (vector/scalar).  */
+#define RS6000_BTM_P8_VECTOR	MASK_P8_VECTOR	/* ISA 2.07 vector.  */
+#define RS6000_BTM_CRYPTO	MASK_CRYPTO	/* crypto funcs.  */
 #define RS6000_BTM_SPE		MASK_STRING	/* E500 */
 #define RS6000_BTM_PAIRED	MASK_MULHW	/* 750CL paired insns.  */
 #define RS6000_BTM_FRE		MASK_POPCNTB	/* FRE instruction.  */
@@ -2380,6 +2477,8 @@ extern int frame_pointer_needed;
 
 #define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
 				 | RS6000_BTM_VSX			\
+				 | RS6000_BTM_P8_VECTOR			\
+				 | RS6000_BTM_CRYPTO			\
 				 | RS6000_BTM_FRE			\
 				 | RS6000_BTM_FRES			\
 				 | RS6000_BTM_FRSQRTE			\
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fe7d2fd55207..9ea4b7a63663 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -25,10 +25,14 @@
 ;;
 
 (define_constants
-  [(STACK_POINTER_REGNUM	1)
+  [(FIRST_GPR_REGNO		0)
+   (STACK_POINTER_REGNUM	1)
    (TOC_REGNUM			2)
    (STATIC_CHAIN_REGNUM		11)
    (HARD_FRAME_POINTER_REGNUM	31)
+   (LAST_GPR_REGNO		31)
+   (FIRST_FPR_REGNO		32)
+   (LAST_FPR_REGNO		63)
    (LR_REGNO			65)
    (CTR_REGNO			66)
    (ARG_POINTER_REGNUM		67)
@@ -123,6 +127,12 @@
    UNSPEC_LFIWZX
    UNSPEC_FCTIWUZ
    UNSPEC_GRP_END_NOP
+   UNSPEC_P8V_FMRGOW
+   UNSPEC_P8V_MTVSRWZ
+   UNSPEC_P8V_RELOAD_FROM_GPR
+   UNSPEC_P8V_MTVSRD
+   UNSPEC_P8V_XXPERMDI
+   UNSPEC_P8V_RELOAD_FROM_VSX
   ])
 
 ;;
@@ -142,7 +152,7 @@
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto"
   (const_string "integer"))
 
 ;; Define floating point instruction sub-types for use with Xfpu.md
@@ -164,7 +174,7 @@
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in rs6000.h.
 
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
+(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -197,6 +207,7 @@
 (include "power5.md")
 (include "power6.md")
 (include "power7.md")
+(include "power8.md")
 (include "cell.md")
 (include "xfpu.md")
 (include "a2.md")
@@ -215,7 +226,7 @@
 (define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])
 
 ; Any supported integer mode.
-(define_mode_iterator INT [QI HI SI DI TI])
+(define_mode_iterator INT [QI HI SI DI TI PTI])
 
 ; Any supported integer mode that fits in one register.
 (define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")])
@@ -223,6 +234,12 @@
 ; extend modes for DImode
 (define_mode_iterator QHSI [QI HI SI])
 
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
 ; SImode or DImode, even if DImode doesn't fit in GPRs.
 (define_mode_iterator SDI [SI DI])
 
@@ -230,6 +247,10 @@
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
 
+; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
+; PTImode is GPR only)
+(define_mode_iterator TI2 [TI PTI])
+
 ; Any hardware-supported floating-point mode
 (define_mode_iterator FP [
   (SF "TARGET_HARD_FLOAT 
@@ -253,6 +274,49 @@
   (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
   ])
 
+; Floating point move iterators to combine binary and decimal moves
+(define_mode_iterator FMOVE32 [SF SD])
+(define_mode_iterator FMOVE64 [DF DD])
+(define_mode_iterator FMOVE64X [DI DF DD])
+(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
+				(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI    "TARGET_VSX_TIMODE")
+				    (V16QI "")
+				    (V8HI  "")
+				    (V4SI  "")
+				    (V4SF  "")
+				    (V2DI  "")
+				    (V2DF  "")])
+
+; Whether a floating point move is ok, don't allow SD without hardware FP
+(define_mode_attr fmove_ok [(SF "")
+			    (DF "")
+			    (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
+			    (DD "")])
+
+; Convert REAL_VALUE to the appropriate bits
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
+					(DF "REAL_VALUE_TO_TARGET_DOUBLE")
+					(SD "REAL_VALUE_TO_TARGET_DECIMAL32")
+					(DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
+
+; Definitions for load to 32-bit fpr register
+(define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
+(define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1")	 (SD "lxsiwzx %x0,%y1")])
+
+; Definitions for store from 32-bit fpr register
+(define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
+(define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
+
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
 (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -263,6 +327,25 @@
 ; Iterator for just SF/DF
 (define_mode_iterator SFDF [SF DF])
 
+; SF/DF suffix for traditional floating instructions
+(define_mode_attr Ftrad		[(SF "s") (DF "")])
+
+; SF/DF suffix for VSX instructions
+(define_mode_attr Fvsx		[(SF "sp") (DF	"dp")])
+
+; SF/DF constraint for arithmetic on traditional floating point registers
+(define_mode_attr Ff		[(SF "f") (DF "d")])
+
+; SF/DF constraint for arithmetic on VSX registers
+(define_mode_attr Fv		[(SF "wy") (DF "ws")])
+
+; s/d suffix for things like fp_addsub_s/fp_addsub_d
+(define_mode_attr Fs		[(SF "s")  (DF "d")])
+
+; FRE/FRES support
+(define_mode_attr Ffre		[(SF "fres") (DF "fre")])
+(define_mode_attr FFRE		[(SF "FRES") (DF "FRE")])
+
 ; Conditional returns.
 (define_code_iterator any_return [return simple_return])
 (define_code_attr return_pred [(return "direct_return ()")
@@ -271,7 +354,14 @@
 
 ; Various instructions that come in SI and DI forms.
 ; A generic w/d attribute, for things like cmpw/cmpd.
-(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
+(define_mode_attr wd [(QI    "b")
+		      (HI    "h")
+		      (SI    "w")
+		      (DI    "d")
+		      (V16QI "b")
+		      (V8HI  "h")
+		      (V4SI  "w")
+		      (V2DI  "d")])
 
 ; DImode bits
 (define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
@@ -311,6 +401,77 @@
 
 (define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
 				(DF "TARGET_DOUBLE_FLOAT")])
+
+;; Mode iterator for logical operations on 128-bit types
+(define_mode_iterator BOOL_128		[TI
+					 PTI
+					 (V16QI	"TARGET_ALTIVEC")
+					 (V8HI	"TARGET_ALTIVEC")
+					 (V4SI	"TARGET_ALTIVEC")
+					 (V4SF	"TARGET_ALTIVEC")
+					 (V2DI	"TARGET_ALTIVEC")
+					 (V2DF	"TARGET_ALTIVEC")])
+
+;; For the GPRs we use 3 constraints for register outputs, two that are the
+;; same as the output register, and a third where the output register is an
+;; early clobber, so we don't have to deal with register overlaps.  For the
+;; vector types, we prefer to use the vector registers.  For TI mode, allow
+;; either.
+
+;; Mode attribute for boolean operation register constraints for output
+(define_mode_attr BOOL_REGS_OUTPUT	[(TI	"&r,r,r,wa,v")
+					 (PTI	"&r,r,r")
+					 (V16QI	"wa,v,&?r,?r,?r")
+					 (V8HI	"wa,v,&?r,?r,?r")
+					 (V4SI	"wa,v,&?r,?r,?r")
+					 (V4SF	"wa,v,&?r,?r,?r")
+					 (V2DI	"wa,v,&?r,?r,?r")
+					 (V2DF	"wa,v,&?r,?r,?r")])
+
+;; Mode attribute for boolean operation register constraints for operand1
+(define_mode_attr BOOL_REGS_OP1		[(TI	"r,0,r,wa,v")
+					 (PTI	"r,0,r")
+					 (V16QI	"wa,v,r,0,r")
+					 (V8HI	"wa,v,r,0,r")
+					 (V4SI	"wa,v,r,0,r")
+					 (V4SF	"wa,v,r,0,r")
+					 (V2DI	"wa,v,r,0,r")
+					 (V2DF	"wa,v,r,0,r")])
+
+;; Mode attribute for boolean operation register constraints for operand2
+(define_mode_attr BOOL_REGS_OP2		[(TI	"r,r,0,wa,v")
+					 (PTI	"r,r,0")
+					 (V16QI	"wa,v,r,r,0")
+					 (V8HI	"wa,v,r,r,0")
+					 (V4SI	"wa,v,r,r,0")
+					 (V4SF	"wa,v,r,r,0")
+					 (V2DI	"wa,v,r,r,0")
+					 (V2DF	"wa,v,r,r,0")])
+
+;; Mode attribute for boolean operation register constraints for operand1
+;; for one_cmpl.  To simplify things, we repeat the constraint where 0
+;; is used for operand1 or operand2
+(define_mode_attr BOOL_REGS_UNARY	[(TI	"r,0,0,wa,v")
+					 (PTI	"r,0,0")
+					 (V16QI	"wa,v,r,0,0")
+					 (V8HI	"wa,v,r,0,0")
+					 (V4SI	"wa,v,r,0,0")
+					 (V4SF	"wa,v,r,0,0")
+					 (V2DI	"wa,v,r,0,0")
+					 (V2DF	"wa,v,r,0,0")])
+
+;; Mode attribute for the clobber of CC0 for AND expansion.
+;; For the 128-bit types, we never do AND immediate, but we need to
+;; get the correct number of X's for the number of operands.
+(define_mode_attr BOOL_REGS_AND_CR0	[(TI	"X,X,X,X,X")
+					 (PTI	"X,X,X")
+					 (V16QI	"X,X,X,X,X")
+					 (V8HI	"X,X,X,X,X")
+					 (V4SI	"X,X,X,X,X")
+					 (V4SF	"X,X,X,X,X")
+					 (V2DI	"X,X,X,X,X")
+					 (V2DF	"X,X,X,X,X")])
+
 
 ;; Start with fixed-point load and store insns.  Here we put only the more
 ;; complex forms.  Basic data transfer is done later.
@@ -324,11 +485,19 @@
 (define_insn "*zero_extend<mode>di2_internal1"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
   "@
    l<wd>z%U1%X1 %0,%1
    rldicl %0,%1,0,<dbits>"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn "*zero_extend<mode>di2_internal2"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -382,6 +551,29 @@
 		    (const_int 0)))]
   "")
 
+(define_insn "*zero_extendsidi2_lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu")
+	(zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWZX"
+  "@
+   lwz%U1%X1 %0,%1
+   rldicl %0,%1,0,32
+   mtvsrwz %x0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
 (define_insn "extendqidi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -454,7 +646,15 @@
   "@
    lha%U1%X1 %0,%1
    extsh %0,%1"
-  [(set_attr "type" "load_ext,exts")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
 
 (define_insn ""
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
@@ -521,16 +721,47 @@
   "TARGET_POWERPC64"
   "")
 
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWAX"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1
+   mtvsrwa %x0,%1
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
-  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
   "@
    lwa%U1%X1 %0,%1
    extsw %0,%1"
-  [(set_attr "type" "load_ext,exts")])
-
-(define_insn ""
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
+
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
   "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -602,7 +833,15 @@
   "@
    lbz%U1%X1 %0,%1
    rlwinm %0,%1,0,0xff"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn ""
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -722,7 +961,15 @@
   "@
    lbz%U1%X1 %0,%1
    rlwinm %0,%1,0,0xff"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn ""
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -848,7 +1095,15 @@
   "@
    lhz%U1%X1 %0,%1
    rlwinm %0,%1,0,0xffff"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn ""
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -915,7 +1170,15 @@
   "@
    lha%U1%X1 %0,%1
    extsh %0,%1"
-  [(set_attr "type" "load_ext,exts")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
 
 (define_insn ""
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
@@ -1658,7 +1921,19 @@
     FAIL;
 })
 
-(define_insn "one_cmpl<mode>2"
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(not:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  ""
+{
+  if (<MODE>mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
+      DONE;
+    }
+})
+
+(define_insn "*one_cmpl<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
   ""
@@ -1935,7 +2210,9 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
   "TARGET_CMPB && TARGET_POPCNTB"
-  "prty<wd> %0,%1")
+  "prty<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
 
 (define_expand "parity<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -4054,7 +4331,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -4086,7 +4363,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -4455,224 +4732,226 @@
 		    (const_int 0)))]
   "")
 
-;; Floating-point insns, excluding normal data motion.
-;;
-;; PowerPC has a full set of single-precision floating point instructions.
-;;
-;; For the POWER architecture, we pretend that we have both SFmode and
-;; DFmode insns, while, in fact, all fp insns are actually done in double.
-;; The only conversions we will do will be when storing to memory.  In that
-;; case, we will use the "frsp" instruction before storing.
-;;
-;; Note that when we store into a single-precision memory location, we need to
-;; use the frsp insn first.  If the register being stored isn't dead, we
-;; need a scratch register for the frsp.  But this is difficult when the store
-;; is done by reload.  It is not incorrect to do the frsp on the register in
-;; this case, we just lose precision that we would have otherwise gotten but
-;; is not guaranteed.  Perhaps this should be tightened up at some point.
+
+;; Floating-point insns, excluding normal data motion.  We combine the SF/DF
+;; modes here, and also add in conditional vsx/power8-vector support to access
+;; values in the traditional Altivec registers if the appropriate
+;; -mupper-regs-{df,sf} option is enabled.
 
-(define_expand "extendsfdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+(define_expand "abs<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn_and_split "*extendsfdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d")
-	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+(define_insn "*abs<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
   "@
-   #
-   fmr %0,%1
-   lfs%U1%X1 %0,%1"
-  "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
-  [(const_int 0)]
-{
-  emit_note (NOTE_INSN_DELETED);
-  DONE;
-}
-  [(set_attr "type" "fp,fp,fpload")])
-
-(define_expand "truncdfsf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*truncdfsf2_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "frsp %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "negsf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(neg:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
-  "")
-
-(define_insn "*negsf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fneg %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "abssf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
-  "")
-
-(define_insn "*abssf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fabs %0,%1"
-  [(set_attr "type" "fp")])
+   fabs %0,%1
+   xsabsdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fnabs %0,%1"
-  [(set_attr "type" "fp")])
+(define_insn "*nabs<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(neg:SFDF
+	 (abs:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnabs %0,%1
+   xsnabsdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "addsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		 (match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "neg<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
-		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fadds %0,%1,%2"
+(define_insn "*neg<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fneg %0,%1
+   xsnegdp %x0,%x1"
   [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_s")])
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "subsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		  (match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "add<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		  (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fsubs %0,%1,%2"
+(define_insn "*add<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fadd<Ftrad> %0,%1,%2
+   xsadd<Fvsx> %x0,%x1,%x2"
   [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_s")])
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "mulsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		 (match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "sub<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		    (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
-		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fmuls %0,%1,%2"
+(define_insn "*sub<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		    (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fsub<Ftrad> %0,%1,%2
+   xssub<Fvsx> %x0,%x1,%x2"
   [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_mul_s")])
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "divsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		(match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+(define_expand "mul<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(div:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		(match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS
-   && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
-  "fdivs %0,%1,%2"
-  [(set_attr "type" "sdiv")])
+(define_insn "*mul<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmul<Ftrad> %0,%1,%2
+   xsmul<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "dmul")
+   (set_attr "fp_type" "fp_mul_<Fs>")])
 
-(define_insn "fres"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
-  "TARGET_FRES"
-  "fres %0,%1"
+(define_expand "div<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		  (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN && !TARGET_SIMPLE_FPU"
+  "")
+
+(define_insn "*div<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU"
+  "@
+   fdiv<Ftrad> %0,%1,%2
+   xsdiv<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "<Fs>div")
+   (set_attr "fp_type" "fp_div_<Fs>")])
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
+   && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
+  "@
+   fsqrt<Ftrad> %0,%1
+   xssqrt<Fvsx> %x0,%x1"
+  [(set_attr "type" "<Fs>sqrt")
+   (set_attr "fp_type" "fp_sqrt_<Fs>")])
+
+;; Floating point reciprocal approximation
+(define_insn "fre<Fs>"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_FRES))]
+  "TARGET_<FFRE>"
+  "@
+   fre<Ftrad> %0,%1
+   xsre<Fvsx> %x0,%x1"
   [(set_attr "type" "fp")])
 
-; builtin fmaf support
-(define_insn "*fmasf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		(match_operand:SF 2 "gpc_reg_operand" "f")
-		(match_operand:SF 3 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fmadds %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+(define_insn "*rsqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_RSQRT))]
+  "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
+  "@
+   frsqrte<Ftrad> %0,%1
+   xsrsqrte<Fvsx> %x0,%x1"
+  [(set_attr "type" "fp")])
 
-(define_insn "*fmssf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		(match_operand:SF 2 "gpc_reg_operand" "f")
-		(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fmsubs %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+;; Floating point comparisons
+(define_insn "*cmp<mode>_fpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y")
+	(compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fcmpu %0,%1,%2
+   xscmpudp %0,%x1,%x2"
+  [(set_attr "type" "fpcompare")])
 
-(define_insn "*nfmasf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-			(match_operand:SF 2 "gpc_reg_operand" "f")
-			(match_operand:SF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fnmadds %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+;; Floating point conversions
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
 
-(define_insn "*nfmssf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-			(match_operand:SF 2 "gpc_reg_operand" "f")
-			(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fnmsubs %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+(define_insn_and_split "*extendsfdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "@
+   #
+   fmr %0,%1
+   lfs%U1%X1 %0,%1
+   #
+   xxlor %x0,%x1,%x1
+   lxsspx %x0,%y1"
+  "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr_alternative "type"
+      [(const_string "fp")
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	 (match_test "update_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_u")
+	 (const_string "fpload")))])])
 
-(define_expand "sqrtsf2"
+(define_expand "truncdfsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU)
-   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
-   && !TARGET_SIMPLE_FPU"
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
-  "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT
-   && TARGET_FPRS && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
-  "fsqrts %0,%1"
-  [(set_attr "type" "ssqrt")])
-
-(define_insn "*rsqrtsf_internal1"
+(define_insn "*truncdfsf2_fpr"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
-		   UNSPEC_RSQRT))]
-  "TARGET_FRSQRTES"
-  "frsqrtes %0,%1"
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "frsp %0,%1"
   [(set_attr "type" "fp")])
 
 ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
@@ -4742,37 +5021,82 @@
 ;; Use an unspec rather providing an if-then-else in RTL, to prevent the
 ;; compiler from optimizing -0.0
 (define_insn "copysign<mode>3_fcpsgn"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")
-		      (match_operand:SFDF 2 "gpc_reg_operand" "<rreg2>")]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_COPYSIGN))]
-  "TARGET_CMPB && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "fcpsgn %0,%2,%1"
+  "TARGET_<MODE>_FPR && TARGET_CMPB"
+  "@
+   fcpsgn %0,%2,%1
+   xscpsgn<Fvsx> %x0,%x2,%x1"
   [(set_attr "type" "fp")])
 
 ;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
 ;; fsel instruction and some auxiliary computations.  Then we just have a
 ;; single DEFINE_INSN for fsel and the define_splits to make them if made by
 ;; combine.
-(define_expand "smaxsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
-			     (match_operand:SF 2 "gpc_reg_operand" ""))
-			 (match_dup 1)
-			 (match_dup 2)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
+;; For MIN, MAX on non-VSX machines, and conditional move all of the time, we
+;; use DEFINE_EXPAND's that involve a fsel instruction and some auxiliary
+;; computations.  Then we just have a single DEFINE_INSN for fsel and the
+;; define_splits to make them if made by combine.  On VSX machines we have the
+;; min/max instructions.
+;;
+;; On VSX, we only check for TARGET_VSX instead of checking for a vsx/p8 vector
+;; to allow either DF/SF to use only traditional registers.
 
-(define_expand "sminsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
-			     (match_operand:SF 2 "gpc_reg_operand" ""))
-			 (match_dup 2)
-			 (match_dup 1)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
+(define_expand "smax<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
+			       (match_operand:SFDF 2 "gpc_reg_operand" ""))
+			   (match_dup 1)
+			   (match_dup 2)))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
+{
+  rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*smax<mode>3_vsx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(smax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && TARGET_VSX"
+  "xsmaxdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")])
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
+			       (match_operand:SFDF 2 "gpc_reg_operand" ""))
+			   (match_dup 2)
+			   (match_dup 1)))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
+{
+  rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*smin<mode>3_vsx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(smin:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && TARGET_VSX"
+  "xsmindp %x0,%x1,%x2"
+  [(set_attr "type" "fp")])
+
+(define_split
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(match_operator:SFDF 3 "min_max_operator"
+	 [(match_operand:SFDF 1 "gpc_reg_operand" "")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "")]))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math
+   && !TARGET_VSX"
+  [(const_int 0)]
+{
+  rs6000_emit_minmax (operands[0], GET_CODE (operands[3]), operands[1],
+		      operands[2]);
+  DONE;
+})
 
 (define_split
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
@@ -4904,208 +5228,9 @@
   "fsel %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
-(define_expand "negdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*negdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fneg %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "absdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*absdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fabs %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_insn "*nabsdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fnabs %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "adddf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		 (match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*adddf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
-		 (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fadd %0,%1,%2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
-(define_expand "subdf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		  (match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*subdf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "d")
-		  (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fsub %0,%1,%2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
-(define_expand "muldf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		 (match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*muldf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
-		 (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fmul %0,%1,%2"
-  [(set_attr "type" "dmul")
-   (set_attr "fp_type" "fp_mul_d")])
-
-(define_expand "divdf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		(match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT
-   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)
-   && !TARGET_SIMPLE_FPU"
-  "")
-
-(define_insn "*divdf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(div:DF (match_operand:DF 1 "gpc_reg_operand" "d")
-		(match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fdiv %0,%1,%2"
-  [(set_attr "type" "ddiv")])
-
-(define_insn "*fred_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
-  "TARGET_FRE && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fre %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_insn "*rsqrtdf_internal1"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
-		   UNSPEC_RSQRT))]
-  "TARGET_FRSQRTE && !VECTOR_UNIT_VSX_P (DFmode)"
-  "frsqrte %0,%1"
-  [(set_attr "type" "fp")])
-
-; builtin fma support
-(define_insn "*fmadf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-		(match_operand:DF 2 "gpc_reg_operand" "f")
-		(match_operand:DF 3 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fmsdf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-		(match_operand:DF 2 "gpc_reg_operand" "f")
-		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*nfmadf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-			(match_operand:DF 2 "gpc_reg_operand" "f")
-			(match_operand:DF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fnmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*nfmsdf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-			(match_operand:DF 2 "gpc_reg_operand" "f")
-			(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fnmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_expand "sqrtdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "")
-
-(define_insn "*sqrtdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fsqrt %0,%1"
-  [(set_attr "type" "dsqrt")])
-
 ;; The conditional move instructions allow us to perform max and min
 ;; operations even when
 
-(define_expand "smaxdf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
-			     (match_operand:DF 2 "gpc_reg_operand" ""))
-			 (match_dup 1)
-			 (match_dup 2)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
-   && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
-
-(define_expand "smindf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
-			     (match_operand:DF 2 "gpc_reg_operand" ""))
-			 (match_dup 2)
-			 (match_dup 1)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
-   && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
-
 (define_split
   [(set (match_operand:DF 0 "gpc_reg_operand" "")
 	(match_operator:DF 3 "min_max_operator"
@@ -5159,12 +5284,15 @@
 ; We don't define lfiwax/lfiwzx with the normal definition, because we
 ; don't want to support putting SImode in FPR registers.
 (define_insn "lfiwax"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWAX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
-  "lfiwax %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1
+   mtvsrwa %x0,%1"
+  [(set_attr "type" "fpload,fpload,mffgpr")])
 
 ; This split must be run before register allocation because it allocates the
 ; memory slot that is needed to move values to/from the FPR.  We don't allocate
@@ -5186,7 +5314,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, false);
   else
     {
@@ -5235,12 +5364,15 @@
    (set_attr "type" "fpload")])
 
 (define_insn "lfiwzx"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWZX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
-  "lfiwzx %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1
+   mtvsrwz %x0,%1"
+  [(set_attr "type" "fpload,fpload,mftgpr")])
 
 (define_insn_and_split "floatunssi<mode>2_lfiwzx"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5257,7 +5389,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, true);
   else
     {
@@ -5548,7 +5681,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -5642,7 +5775,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -5781,66 +5914,52 @@
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
 	(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
 		   UNSPEC_FCTID))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
   "fctid %0,%1"
   [(set_attr "type" "fp")])
 
-(define_expand "btrunc<mode>2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
-		     UNSPEC_FRIZ))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
-  "")
-
-(define_insn "*btrunc<mode>2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+(define_insn "btrunc<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_FRIZ))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
-   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "friz %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "ceil<mode>2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
-		     UNSPEC_FRIP))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
-  "")
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   friz %0,%1
+   xsrdpiz %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_insn "*ceil<mode>2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+(define_insn "ceil<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_FRIP))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
-   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "frip %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "floor<mode>2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
-		     UNSPEC_FRIM))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
-  "")
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   frip %0,%1
+   xsrdpip %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_insn "*floor<mode>2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+(define_insn "floor<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_FRIM))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
-   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "frim %0,%1"
-  [(set_attr "type" "fp")])
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   frim %0,%1
+   xsrdpim %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
 ;; No VSX equivalent to frin
 (define_insn "round<mode>2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
 	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
 		     UNSPEC_FRIN))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
   "frin %0,%1"
-  [(set_attr "type" "fp")])
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
 ; An UNSPEC is used so we don't have to support SImode in FP registers.
 (define_insn "stfiwx"
@@ -7195,10 +7314,19 @@
   [(parallel
     [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	  (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
-		  (match_operand:DI 2 "and64_2_operand" "")))
+		  (match_operand:DI 2 "reg_or_cint_operand" "")))
      (clobber (match_scratch:CC 3 ""))])]
-  "TARGET_POWERPC64"
-  "")
+  ""
+{
+  if (!TARGET_POWERPC64)
+    {
+      rtx cc = gen_rtx_SCRATCH (CCmode);
+      rs6000_split_logical (operands, AND, false, false, false, cc);
+      DONE;
+    }
+  else if (!and64_2_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+})
 
 (define_insn "anddi3_mc"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
@@ -7379,11 +7507,17 @@
 (define_expand "iordi3"
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(ior:DI (match_operand:DI 1 "gpc_reg_operand" "")
-		(match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
-  "TARGET_POWERPC64"
-  "
+		(match_operand:DI 2 "reg_or_cint_operand" "")))]
+  ""
 {
-  if (non_logical_cint_operand (operands[2], DImode))
+  if (!TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, IOR, false, false, false, NULL_RTX);
+      DONE;
+    }
+  else if (!reg_or_logical_cint_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  else if (non_logical_cint_operand (operands[2], DImode))
     {
       HOST_WIDE_INT value;
       rtx tmp = ((!can_create_pseudo_p ()
@@ -7408,15 +7542,21 @@
       emit_insn (gen_iordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
       DONE;
     }
-}")
+})
 
 (define_expand "xordi3"
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(xor:DI (match_operand:DI 1 "gpc_reg_operand" "")
-		(match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
-  "TARGET_POWERPC64"
-  "
+		(match_operand:DI 2 "reg_or_cint_operand" "")))]
+  ""
 {
+  if (!TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, XOR, false, false, false, NULL_RTX);
+      DONE;
+    }
+  else if (!reg_or_logical_cint_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
   if (non_logical_cint_operand (operands[2], DImode))
     {
       HOST_WIDE_INT value;
@@ -7442,7 +7582,7 @@
       emit_insn (gen_xordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
       DONE;
     }
-}")
+})
 
 (define_insn "*booldi3_internal1"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
@@ -7664,20 +7804,398 @@
   [(set_attr "type" "fast_compare,compare")
    (set_attr "length" "4,8")])
 
-(define_split
-  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC (match_operator:DI 4 "boolean_operator"
-	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
-	  (not:DI (match_operand:DI 2 "gpc_reg_operand" ""))])
-	 (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "")
-	(match_dup 4))]
-  "TARGET_POWERPC64 && reload_completed"
-  [(set (match_dup 0) (match_dup 4))
-   (set (match_dup 3)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  "")
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR
+	 (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+  ""
+  "eqv %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+
+;; 128-bit logical operations expanders
+
+(define_expand "and<mode>3"
+  [(parallel [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+		   (and:BOOL_128
+		    (match_operand:BOOL_128 1 "vlogical_operand" "")
+		    (match_operand:BOOL_128 2 "vlogical_operand" "")))
+	      (clobber (match_scratch:CC 3 ""))])]
+  ""
+  "")
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		      (match_operand:BOOL_128 2 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		      (match_operand:BOOL_128 2 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "nor<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(and:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  ""
+  "")
+
+(define_expand "andc<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (and:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
+	 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(not:BOOL_128
+	 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		       (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(ior:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; The canonical form is to have the negated element first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(ior:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
+	 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; 128-bit logical operations insns and split operations
+(define_insn_and_split "*and<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+        (and:BOOL_128
+	 (match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
+	 (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")))
+   (clobber (match_scratch:CC 3 "<BOOL_REGS_AND_CR0>"))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxland %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "vand %0,%1,%2";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, AND, false, false, false, operands[3]);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+;; 128-bit IOR/XOR
+(define_insn_and_split "*bool<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_or_operator"
+	 [(match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
+	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+;; 128-bit ANDC/ORC
+(define_insn_and_split "*boolc<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_operator"
+	 [(not:BOOL_128
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+  "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
+   && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*boolc<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(match_operator:TI2 3 "boolean_operator"
+	 [(not:TI2
+	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
+	  (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
+  "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+;; 128-bit NAND/NOR
+(define_insn_and_split "*boolcc<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_operator"
+	 [(not:BOOL_128
+	   (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>"))
+	  (not:BOOL_128
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))]))]
+  "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
+   && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*boolcc<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(match_operator:TI2 3 "boolean_operator"
+	 [(not:TI2
+	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
+	  (not:TI2
+	   (match_operand:TI2 2 "int_reg_operand" "r,r,0"))]))]
+  "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
+			NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+
+;; 128-bit EQV
+(define_insn_and_split "*eqv<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(not:BOOL_128
+	 (xor:BOOL_128
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")
+	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))]
+  "TARGET_P8_VECTOR"
+{
+  if (vsx_register_operand (operands[0], <MODE>mode))
+    return "xxleqv %x0,%x1,%x2";
+
+  return "#";
+}
+  "TARGET_P8_VECTOR && reload_completed
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*eqv<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(not:TI2
+	 (xor:TI2
+	  (match_operand:TI2 1 "int_reg_operand" "r,0,r")
+	  (match_operand:TI2 2 "int_reg_operand" "r,r,0"))))]
+  "!TARGET_P8_VECTOR"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+;; 128-bit one's complement
+(define_insn_and_split "*one_cmpl<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(not:BOOL_128
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxlnor %x0,%x1,%x1";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "vnor %0,%1,%1";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
 
 ;; Now define ways of moving data around.
 
@@ -7765,7 +8283,31 @@
    mt%0 %1
    mt%0 %1
    nop"
-  [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*")
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "*")])
+
    (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4")])
 
 (define_insn "*movsi_internal1_single"
@@ -7787,7 +8329,44 @@
    nop
    stfs%U0%X0 %1,%0
    lfs%U1%X1 %0,%1"
-  [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*,*,*")
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))])
    (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")])
 
 ;; Split a load of a large constant into the appropriate two-insn
@@ -7822,7 +8401,7 @@
    cmp<wd>i %2,%0,0
    mr. %0,%1
    #"
-  [(set_attr "type" "cmp,compare,cmp")
+  [(set_attr "type" "cmp,fast_compare,cmp")
    (set_attr "length" "4,4,8")])
 
 (define_split
@@ -7850,7 +8429,26 @@
    mf%1 %0
    mt%0 %1
    nop"
-  [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")])])
 
 (define_expand "mov<mode>"
   [(set (match_operand:INT 0 "general_operand" "")
@@ -7871,7 +8469,26 @@
    mf%1 %0
    mt%0 %1
    nop"
-  [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")])])
 
 ;; Here is how to move condition codes around.  When we store CC data in
 ;; an integer register or memory, we store just the high-order 4 bits.
@@ -7899,7 +8516,7 @@
    mf%1 %0
    mt%0 %1
    lwz%U1%X1 %0,%1
-   stw%U0%U1 %1,%0"
+   stw%U0%X0 %1,%0"
   [(set (attr "type")
      (cond [(eq_attr "alternative" "0,3")
 		(const_string "cr_logical")
@@ -7912,9 +8529,23 @@
 	    (eq_attr "alternative" "9")
 		(const_string "mtjmpr")
 	    (eq_attr "alternative" "10")
-		(const_string "load")
+		(if_then_else
+		  (match_test "update_indexed_address_mem (operands[1],
+							   VOIDmode)")
+		  (const_string "load_ux")
+		  (if_then_else
+		    (match_test "update_address_mem (operands[1], VOIDmode)")
+		    (const_string "load_u")
+		    (const_string "load")))
 	    (eq_attr "alternative" "11")
-		(const_string "store")
+		(if_then_else
+		  (match_test "update_indexed_address_mem (operands[0],
+							   VOIDmode)")
+		  (const_string "store_ux")
+		  (if_then_else
+		    (match_test "update_address_mem (operands[0], VOIDmode)")
+		    (const_string "store_u")
+		    (const_string "store")))
 	    (match_test "TARGET_MFCRF")
 		(const_string "mfcrf")
 	   ]
@@ -7926,15 +8557,17 @@
 ;; can produce floating-point values in fixed-point registers.  Unless the
 ;; value is a simple constant or already in memory, we deal with this by
 ;; allocating memory and copying the value explicitly via that memory location.
-(define_expand "movsf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-	(match_operand:SF 1 "any_operand" ""))]
-  ""
-  "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
+
+;; Move 32-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE32 1 "any_operand" ""))]
+  "<fmove_ok>"
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 (define_split
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(match_operand:SF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE32 1 "const_double_operand" ""))]
   "reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -7947,42 +8580,89 @@
   REAL_VALUE_TYPE rv;
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+  <real_value_to_target> (rv, l);
 
   if (! TARGET_POWERPC64)
-    operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+    operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
   else
     operands[2] = gen_lowpart (SImode, operands[0]);
 
   operands[3] = gen_int_mode (l, SImode);
 }")
 
-(define_insn "*movsf_hardfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,!r,*h,!r,!r")
-	(match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,h,0,G,Fn"))]
-  "(gpc_reg_operand (operands[0], SFmode)
-   || gpc_reg_operand (operands[1], SFmode))
+(define_insn "mov<mode>_hardfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
   "@
    mr %0,%1
    lwz%U1%X1 %0,%1
    stw%U0%X0 %1,%0
    fmr %0,%1
-   lfs%U1%X1 %0,%1
-   stfs%U0%X0 %1,%0
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   <f32_li>
+   <f32_si>
+   <f32_lv>
+   <f32_sv>
+   mtvsrwz %x0,%1
+   mfvsrwz %0,%x1
    mt%0 %1
    mf%1 %0
    nop
    #
    #"
-  [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8")])
-
-(define_insn "*movsf_softfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
-	(match_operand:SF 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
-  "(gpc_reg_operand (operands[0], SFmode)
-   || gpc_reg_operand (operands[1], SFmode))
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (const_string "fpload")
+       (const_string "fpstore")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
+
+(define_insn "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
+	(match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
   "@
    mr %0,%1
@@ -7995,19 +8675,42 @@
    #
    #
    nop"
-  [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*")
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
    (set_attr "length" "4,4,4,4,4,4,4,4,8,4")])
 
 
-(define_expand "movdf"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(match_operand:DF 1 "any_operand" ""))]
+;; Move 64-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE64 1 "any_operand" ""))]
   ""
-  "{ rs6000_emit_move (operands[0], operands[1], DFmode); DONE; }")
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 (define_split
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(match_operand:DF 1 "const_int_operand" ""))]
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_int_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -8020,8 +8723,8 @@
   int endian = (WORDS_BIG_ENDIAN == 0);
   HOST_WIDE_INT value = INTVAL (operands[1]);
 
-  operands[2] = operand_subword (operands[0], endian, 0, DFmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
+  operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
 #if HOST_BITS_PER_WIDE_INT == 32
   operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
 #else
@@ -8031,8 +8734,8 @@
 }")
 
 (define_split
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(match_operand:DF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_double_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -8047,17 +8750,17 @@
   REAL_VALUE_TYPE rv;
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+  <real_value_to_target> (rv, l);
 
-  operands[2] = operand_subword (operands[0], endian, 0, DFmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
+  operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
   operands[4] = gen_int_mode (l[endian], SImode);
   operands[5] = gen_int_mode (l[1 - endian], SImode);
 }")
 
 (define_split
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(match_operand:DF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_double_operand" ""))]
   "TARGET_POWERPC64 && reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -8074,7 +8777,7 @@
 #endif
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+  <real_value_to_target> (rv, l);
 
   operands[2] = gen_lowpart (DImode, operands[0]);
   /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
@@ -8099,21 +8802,18 @@
 ;; since the D-form version of the memory instructions does not need a GPR for
 ;; reloading.
 
-(define_insn "*movdf_hardfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r")
-	(match_operand:DF 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))]
+(define_insn "*mov<mode>_hardfloat32"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
    lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
-   stxsd%U0x %x1,%y0
-   xxlor %x0,%x1,%x1
    xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    #
@@ -8122,115 +8822,140 @@
    #
    #
    #"
-  [(set_attr "type" "fpstore,fpload,fp,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,store,load,two,fp,fp,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")])
-
-(define_insn "*movdf_softfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
-	(match_operand:DF 1 "input_operand" "r,Y,r,G,H,F"))]
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (const_string "fpload"))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (const_string "fpstore"))
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (const_string "store")
+       (const_string "load")
+       (const_string "two")
+       (const_string "fp")
+       (const_string "fp")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")])
+
+(define_insn "*mov<mode>_softfloat32"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
+	(match_operand:FMOVE64 1 "input_operand" "r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 
    && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) 
        || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
   [(set_attr "type" "store,load,two,*,*,*")
    (set_attr "length" "8,8,8,8,12,16")])
 
-;; Reload patterns to support gpr load/store with misaligned mem.
-;; and multiple gpr load/store at offset >= 0xfffc
-(define_expand "reload_<mode>_store"
-  [(parallel [(match_operand 0 "memory_operand" "=m")
-              (match_operand 1 "gpc_reg_operand" "r")
-              (match_operand:GPR 2 "register_operand" "=&b")])]
-  ""
-{
-  rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
-  DONE;
-})
-
-(define_expand "reload_<mode>_load"
-  [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
-              (match_operand 1 "memory_operand" "m")
-              (match_operand:GPR 2 "register_operand" "=b")])]
-  ""
-{
-  rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
-  DONE;
-})
-
-; ld/std require word-aligned displacements -> 'Y' constraint.
-; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdf_hardfloat64_mfpgpr"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r,r,d")
-	(match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
-  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_DOUBLE_FLOAT
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   xxlor %x0,%x1,%x1
-   xxlor %x0,%x1,%x1
-   lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
-   stxsd%U0x %x1,%y0
-   stxsd%U0x %x1,%y0
-   stfd%U0%X0 %1,%0
-   lfd%U1%X1 %0,%1
-   fmr %0,%1
-   xxlxor %x0,%x0,%x0
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #
-   #
-   mftgpr %0,%1
-   mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
-
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdf_hardfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,Y,r,!r,ws,?wa,Z,?Z,ws,?wa,wa,*c*l,!r,*h,!r,!r,!r")
-	(match_operand:DF 1 "input_operand" "d,m,d,r,Y,r,Z,Z,ws,wa,ws,wa,j,r,h,0,G,H,F"))]
-  "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_DOUBLE_FLOAT
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+(define_insn "*mov<mode>_hardfloat64"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
+  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
    lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
-   stxsd%U0x %x1,%y0
    stxsd%U0x %x1,%y0
    xxlor %x0,%x1,%x1
-   xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
    mt%0 %1
    mf%1 %0
    nop
    #
    #
-   #"
-  [(set_attr "type" "fpstore,fpload,fp,store,load,*,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
-
-(define_insn "*movdf_softfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
-	(match_operand:DF 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
+   #
+   mftgpr %0,%1
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (const_string "fpload"))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (const_string "fpstore"))
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
+       (const_string "mffgpr")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
+
+(define_insn "*mov<mode>_softfloat64"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
+	(match_operand:FMOVE64 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
   "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    std%U0%X0 %1,%0
    ld%U1%X1 %0,%1
@@ -8241,38 +8966,57 @@
    #
    #
    nop"
-  [(set_attr "type" "store,load,*,mtjmpr,mfjmpr,*,*,*,*")
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
    (set_attr "length" "4,4,4,4,4,8,12,16,4")])
 
-(define_expand "movtf"
-  [(set (match_operand:TF 0 "general_operand" "")
-	(match_operand:TF 1 "any_operand" ""))]
-  "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
-  "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE128 0 "general_operand" "")
+	(match_operand:FMOVE128 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 ;; It's important to list Y->r and r->Y before r->r because otherwise
 ;; reload, given m->r, will try to pick r->r and reload it, which
 ;; doesn't make progress.
-(define_insn_and_split "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-	(match_operand:TF 1 "input_operand" "d,m,d,r,YGHF,r"))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
-   && (gpc_reg_operand (operands[0], TFmode)
-       || gpc_reg_operand (operands[1], TFmode))"
+(define_insn_and_split "*mov<mode>_internal"
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
   [(set_attr "length" "8,8,8,20,20,16")])
 
-(define_insn_and_split "*movtf_softfloat"
-  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=Y,r,r")
-	(match_operand:TF 1 "input_operand"         "r,YGHF,r"))]
-  "!TARGET_IEEEQUAD
-   && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
-   && (gpc_reg_operand (operands[0], TFmode)
-       || gpc_reg_operand (operands[1], TFmode))"
+(define_insn_and_split "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "r,YGHF,r"))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
   "&& reload_completed"
   [(pc)]
@@ -8556,6 +9300,243 @@
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
   operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
 }")
+
+;; Reload helper functions used by rs6000_secondary_reload.  The patterns all
+;; must have 3 arguments, and scratch register constraint must be a single
+;; constraint.
+
+;; Reload patterns to support gpr load/store with misaligned mem.
+;; and multiple gpr load/store at offset >= 0xfffc
+(define_expand "reload_<mode>_store"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+              (match_operand 1 "gpc_reg_operand" "r")
+              (match_operand:GPR 2 "register_operand" "=&b")])]
+  ""
+{
+  rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
+  DONE;
+})
+
+(define_expand "reload_<mode>_load"
+  [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
+              (match_operand 1 "memory_operand" "m")
+              (match_operand:GPR 2 "register_operand" "=b")])]
+  ""
+{
+  rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode.  We use TF mode to get two registers to move the
+;; individual 32-bit parts across.  Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it.  We have two patterns to do the two moves between gprs and
+;; fprs.  There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions.  TFmode is
+;; currently limited to traditional FPR registers.  If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+	(unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+			 UNSPEC_P8V_FMRGOW))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "fmrgow %0,%1,%L1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+  [(set (match_operand:TF 0 "register_operand" "=d")
+	(unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+  [(set (match_operand:TF 0 "register_operand" "+d")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+	(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+			 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=d"))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (SImode, src);
+  rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+  emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+  [(set (match_operand:TF 0 "register_operand" "=ws")
+	(unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+  [(set (match_operand:TF 0 "register_operand" "+ws")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+			     UNSPEC_P8V_XXPERMDI))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "xxpermdi %x0,%1,%L1,0"
+  [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+	 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DImode, src);
+  rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+  emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a VSX from a GPR register.  Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+  [(set (match_operand:SF 0 "register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+		   UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:DI 2 "register_operand" "=r"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+  rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_move_insn (op0_di, op2);
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+	 UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+  rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+  emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register.  Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+  emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+  emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+  emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
 
 ;; Next come the multi-word integer load and store and the load and store
 ;; multiple insns.
@@ -8565,8 +9546,8 @@
 ;; Use of fprs is disparaged slightly otherwise reload prefers to reload
 ;; a gpr into a fpr instead of reloading an invalid 'Y' address
 (define_insn "*movdi_internal32"
-  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r,?wa")
-	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF,O"))]
+  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
+	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
   "! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -8577,15 +9558,34 @@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
-   #
-   xxlxor %x0,%x0,%x0"
-  [(set_attr "type" "store,load,*,fpstore,fpload,fp,*,vecsimple")])
+   #"
+  [(set_attr_alternative "type"
+      [(const_string "store")
+       (const_string "load")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "*")])])
 
 (define_split
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(match_operand:DI 1 "const_int_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 1))]
   "
@@ -8607,38 +9607,15 @@
   [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
         (match_operand:DIFD 1 "input_operand" ""))]
   "reload_completed && !TARGET_POWERPC64
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
-(define_insn "*movdi_mfpgpr"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*d")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*d,r"))]
-  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DImode)
-       || gpc_reg_operand (operands[1], DImode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   li %0,%1
-   lis %0,%v1
-   #
-   stfd%U0%X0 %1,%0
-   lfd%U1%X1 %0,%1
-   fmr %0,%1
-   mf%1 %0
-   mt%0 %1
-   nop
-   mftgpr %0,%1
-   mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
-
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,?wa")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,O"))]
-  "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS)
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r,*wm,r"))]
+  "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
   "@
@@ -8654,9 +9631,52 @@
    mf%1 %0
    mt%0 %1
    nop
-   xxlxor %x0,%x0,%x0"
-  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,vecsimple")
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4")])
+   mftgpr %0,%1
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
+       (const_string "mffgpr")])
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4")])
 
 ;; immediate value valid for a single instruction hiding in a const_double
 (define_insn ""
@@ -8719,14 +9739,16 @@
     FAIL;
 }")
 
-;; TImode is similar, except that we usually want to compute the address into
-;; a register and use lsi/stsi (the exception is during reload).
+;; TImode/PTImode is similar, except that we usually want to compute the
+;; address into a register and use lsi/stsi (the exception is during reload).
 
-(define_insn "*movti_string"
-  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
-	(match_operand:TI 1 "input_operand" "r,r,Q,Y,r,n"))]
+(define_insn "*mov<mode>_string"
+  [(set (match_operand:TI2 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,Q,Y,r,n"))]
   "! TARGET_POWERPC64
-   && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
+   && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "*
 {
   switch (which_alternative)
@@ -8756,27 +9778,32 @@
    			                  (const_string "always")
 					  (const_string "conditional")))])
 
-(define_insn "*movti_ppc64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=Y,r,r")
-	(match_operand:TI 1 "input_operand" "r,Y,r"))]
-  "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
-    || gpc_reg_operand (operands[1], TImode)))
-   && VECTOR_MEM_NONE_P (TImode)"
-  "#"
-  [(set_attr "type" "store,load,*")])
+(define_insn "*mov<mode>_ppc64"
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
+  "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode)))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "store,store,load,load,*,*")
+   (set_attr "length" "8")])
 
 (define_split
-  [(set (match_operand:TI 0 "gpc_reg_operand" "")
-	(match_operand:TI 1 "const_double_operand" ""))]
-  "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)"
+  [(set (match_operand:TI2 0 "int_reg_operand" "")
+	(match_operand:TI2 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64
+   && (VECTOR_MEM_NONE_P (<MODE>mode)
+       || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
   "
 {
   operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
-				       TImode);
+				       <MODE>mode);
   operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
-				       TImode);
+				       <MODE>mode);
   if (GET_CODE (operands[1]) == CONST_DOUBLE)
     {
       operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
@@ -8792,10 +9819,12 @@
 }")
 
 (define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-        (match_operand:TI 1 "input_operand" ""))]
-  "reload_completed && VECTOR_MEM_NONE_P (TImode)
-   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "")
+        (match_operand:TI2 1 "input_operand" ""))]
+  "reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
@@ -11294,7 +12323,14 @@
   operands[1] = gen_rtx_REG (Pmode, 0);
   return "st<wd>%U0%X0 %1,%0";
 }
-  [(set_attr "type" "store")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	(const_string "store_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[0], VOIDmode)")
+	  (const_string "store_u")
+	  (const_string "store"))))
    (set_attr "length" "4")])
 
 (define_insn "probe_stack_range<P:mode>"
@@ -11589,23 +12625,6 @@
   [(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2)))
    (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
 
-(define_insn "*cmpsf_internal1"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
-	(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "f")
-		      (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fcmpu %0,%1,%2"
-  [(set_attr "type" "fpcompare")])
-
-(define_insn "*cmpdf_internal1"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
-	(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d")
-		      (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fcmpu %0,%1,%2"
-  [(set_attr "type" "fpcompare")])
-
 ;; Only need to compare second words if first words equal
 (define_insn "*cmptf_internal1"
   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
@@ -13885,7 +14904,7 @@
 		   (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
   "TARGET_POPCNTD"
   "bpermd %0,%1,%2"
-  [(set_attr "type" "integer")])
+  [(set_attr "type" "popcnt")])
 
 
 ;; Builtin fma support.  Handle 
@@ -13900,6 +14919,20 @@
   ""
   "")
 
+(define_insn "*fma<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmadd<Ftrad> %0,%1,%2,%3
+   xsmadda<Fvsx> %x0,%x1,%x2
+   xsmaddm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
 ; Altivec only has fma and nfms.
 (define_expand "fms<mode>4"
   [(set (match_operand:FMA_F 0 "register_operand" "")
@@ -13910,6 +14943,20 @@
   "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "")
 
+(define_insn "*fms<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(fma:SFDF
+	 (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	 (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	 (neg:SFDF (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmsub<Ftrad> %0,%1,%2,%3
+   xsmsuba<Fvsx> %x0,%x1,%x2
+   xsmsubm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
 ;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
 (define_expand "fnma<mode>4"
   [(set (match_operand:FMA_F 0 "register_operand" "")
@@ -13943,6 +14990,21 @@
   "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "")
 
+(define_insn "*nfma<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(neg:SFDF
+	 (fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnmadd<Ftrad> %0,%1,%2,%3
+   xsnmadda<Fvsx> %x0,%x1,%x2
+   xsnmaddm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
 ; Not an official optab name, but used from builtins.
 (define_expand "nfms<mode>4"
   [(set (match_operand:FMA_F 0 "register_operand" "")
@@ -13954,6 +15016,23 @@
   ""
   "")
 
+(define_insn "*nfmssf4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(neg:SFDF
+	 (fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (neg:SFDF
+	   (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnmsub<Ftrad> %0,%1,%2,%3
+   xsnmsuba<Fvsx> %x0,%x1,%x2
+   xsnmsubm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
+
 (define_expand "rs6000_get_timebase"
   [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
   ""
@@ -14019,6 +15098,43 @@
     return "mftb %0";
 })
 
+
+;; Power8 fusion support for fusing an addis instruction with a D-form load of
+;; a GPR.  The addis instruction must be adjacent to the load, and use the same
+;; register that is being loaded.  The fused ops must be physically adjacent.
+
+;; We use define_peephole for the actual addis/load, and the register used to
+;; hold the addis value must be the same as the register being loaded.  We use
+;; define_peephole2 to change the register used for addis to be the register
+;; being loaded, since we can look at whether it is dead after the load insn.
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
+{
+  return emit_fusion_gpr_load (operands);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+(define_peephole2
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION
+   && (REGNO (operands[0]) != REGNO (operands[2])
+       || GET_CODE (operands[3]) == SIGN_EXTEND)
+   && fusion_gpr_load_p (operands, true)"
+  [(const_int 0)]
+{
+  expand_fusion_gpr_load (operands);
+  DONE;
+})
+
 
 
 (include "sync.md")
@@ -14028,3 +15144,4 @@
 (include "spe.md")
 (include "dfp.md")
 (include "paired.md")
+(include "crypto.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 8e3cea12185a..db759719de41 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -181,13 +181,16 @@ mvsx
 Target Report Mask(VSX) Var(rs6000_isa_flags)
 Use vector/scalar (VSX) instructions
 
+mvsx-scalar-float
+Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1)
+; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default)
+
 mvsx-scalar-double
-Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(-1)
-; If -mvsx, use VSX arithmetic instructions for scalar double (on by default)
+Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1)
+; If -mvsx, use VSX arithmetic instructions for DFmode (on by default)
 
 mvsx-scalar-memory
-Target Undocumented Report Var(TARGET_VSX_SCALAR_MEMORY)
-; If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
+Target Undocumented Report Alias(mupper-regs-df)
 
 mvsx-align-128
 Target Undocumented Report Var(TARGET_VSX_ALIGN_128)
@@ -514,3 +517,39 @@ Use/do not use r11 to hold the static link in calls to functions via pointers.
 msave-toc-indirect
 Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save
 Control whether we save the TOC in the prologue for indirect calls or generate the save inline
+
+mvsx-timode
+Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
+Allow 128-bit integers in VSX registers
+
+mpower8-fusion
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power8
+
+mpower8-fusion-sign
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
+Allow sign extension in fusion operations
+
+mpower8-vector
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
+Use/do not use vector and scalar instructions added in ISA 2.07.
+
+mcrypto
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
+Use ISA 2.07 crypto instructions
+
+mdirect-move
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
+Use ISA 2.07 direct move between GPR & VSX register instructions
+
+mquad-memory
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
+Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
+
+mupper-regs-df
+Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
+Allow double variables in upper registers with -mcpu=power7 or -mvsx
+
+mupper-regs-sf
+Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
+Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 252e2690a983..8616b3eca5f5 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -18,14 +18,23 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+			(HI "lharx")
+			(SI "lwarx")
+			(DI "ldarx")
+			(TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+			(HI "sthcx.")
+			(SI "stwcx.")
+			(DI "stdcx.")
+			(TI "stqcx.")])
 
 (define_code_iterator FETCHOP [plus minus ior xor and])
 (define_code_attr fetchop_name
   [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
 (define_code_attr fetchop_pred
-  [(plus "add_operand") (minus "gpc_reg_operand")
+  [(plus "add_operand") (minus "int_reg_operand")
    (ior "logical_operand") (xor "logical_operand") (and "and_operand")])
 
 (define_expand "mem_thread_fence"
@@ -129,16 +138,7 @@
     case MEMMODEL_CONSUME:
     case MEMMODEL_ACQUIRE:
     case MEMMODEL_SEQ_CST:
-      if (GET_MODE (operands[0]) == QImode)
-	emit_insn (gen_loadsync_qi (operands[0]));
-      else if (GET_MODE (operands[0]) == HImode)
-	emit_insn (gen_loadsync_hi (operands[0]));
-      else if (GET_MODE (operands[0]) == SImode)
-	emit_insn (gen_loadsync_si (operands[0]));
-      else if (GET_MODE (operands[0]) == DImode)
-	emit_insn (gen_loadsync_di (operands[0]));
-      else
-	gcc_unreachable ();
+      emit_insn (gen_loadsync_<mode> (operands[0]));
       break;
     default:
       gcc_unreachable ();
@@ -170,35 +170,109 @@
   DONE;
 })
 
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in".  Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8.  TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+			      (HI "TARGET_SYNC_HI_QI")
+			      SI
+			      (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
 
-(define_mode_iterator ATOMIC    [SI (DI "TARGET_POWERPC64")])
+(define_mode_iterator AINT [QI
+			    HI
+			    SI
+			    (DI "TARGET_POWERPC64")
+			    (TI "TARGET_SYNC_TI")])
 
 (define_insn "load_locked<mode>"
-  [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+  [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
 	(unspec_volatile:ATOMIC
          [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
   ""
   "<larx> %0,%y1"
   [(set_attr "type" "load_l")])
 
+(define_insn "load_locked<QHI:mode>_si"
+  [(set (match_operand:SI 0 "int_reg_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_HI_QI"
+  "<QHI:larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs
+(define_expand "load_lockedti"
+  [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  /* Use a temporary register to force getting an even register for the
+     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
+     extra copy.  */
+  rtx pti = gen_reg_rtx (PTImode);
+  emit_insn (gen_load_lockedpti (pti, operands[1]));
+  emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+  DONE;
+})
+
+(define_insn "load_lockedpti"
+  [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+	(unspec_volatile:PTI
+         [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_TI
+   && !reg_mentioned_p (operands[0], operands[1])
+   && quad_int_reg_operand (operands[0], PTImode)"
+  "lqarx %0,%y1"
+  [(set_attr "type" "load_l")])
+
 (define_insn "store_conditional<mode>"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
 	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
    (set (match_operand:ATOMIC 1 "memory_operand" "=Z")
-	(match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+	(match_operand:ATOMIC 2 "int_reg_operand" "r"))]
   ""
   "<stcx> %2,%y1"
   [(set_attr "type" "store_c")])
 
+(define_expand "store_conditionalti"
+  [(use (match_operand:CC 0 "cc_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))
+   (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
+  rtx pti_op2 = gen_reg_rtx (PTImode);
+
+  /* Use a temporary register to force getting an even register for the
+     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
+     extra copy.  */
+  emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+  emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+  DONE;
+})
+
+(define_insn "store_conditionalpti"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:PTI 1 "memory_operand" "=Z")
+	(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+  "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+  "stqcx. %2,%y1"
+  [(set_attr "type" "store_c")])
+
 (define_expand "atomic_compare_and_swap<mode>"
-  [(match_operand:SI 0 "gpc_reg_operand" "")		;; bool out
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; val out
-   (match_operand:INT1 2 "memory_operand" "")		;; memory
-   (match_operand:INT1 3 "reg_or_short_operand" "")	;; expected
-   (match_operand:INT1 4 "gpc_reg_operand" "")		;; desired
+  [(match_operand:SI 0 "int_reg_operand" "")		;; bool out
+   (match_operand:AINT 1 "int_reg_operand" "")		;; val out
+   (match_operand:AINT 2 "memory_operand" "")		;; memory
+   (match_operand:AINT 3 "reg_or_short_operand" "")	;; expected
+   (match_operand:AINT 4 "int_reg_operand" "")		;; desired
    (match_operand:SI 5 "const_int_operand" "")		;; is_weak
    (match_operand:SI 6 "const_int_operand" "")		;; model succ
    (match_operand:SI 7 "const_int_operand" "")]		;; model fail
@@ -209,9 +283,9 @@
 })
 
 (define_expand "atomic_exchange<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; input
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; input
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -220,9 +294,9 @@
 })
 
 (define_expand "atomic_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 0)
-     (match_operand:INT1 1 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 0)
+     (match_operand:AINT 1 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -232,8 +306,8 @@
 })
 
 (define_expand "atomic_nand<mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (match_operand:AINT 1 "int_reg_operand" "")		;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -243,10 +317,10 @@
 })
 
 (define_expand "atomic_fetch_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 { 
@@ -256,9 +330,9 @@
 })
 
 (define_expand "atomic_fetch_nand<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -268,10 +342,10 @@
 })
 
 (define_expand "atomic_<fetchop_name>_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -281,9 +355,9 @@
 })
 
 (define_expand "atomic_nand_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 52c183915561..5889d6d82d42 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -60,6 +60,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/power5.md \
 	$(srcdir)/config/rs6000/power6.md \
 	$(srcdir)/config/rs6000/power7.md \
+	$(srcdir)/config/rs6000/power8.md \
 	$(srcdir)/config/rs6000/cell.md \
 	$(srcdir)/config/rs6000/xfpu.md \
 	$(srcdir)/config/rs6000/a2.md \
@@ -70,6 +71,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/vector.md \
 	$(srcdir)/config/rs6000/vsx.md \
 	$(srcdir)/config/rs6000/altivec.md \
+	$(srcdir)/config/rs6000/crypto.md \
 	$(srcdir)/config/rs6000/spe.md \
 	$(srcdir)/config/rs6000/dfp.md \
 	$(srcdir)/config/rs6000/paired.md
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 5a6e1fb30265..2eaa2d325c35 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -24,13 +24,13 @@
 
 
 ;; Vector int modes
-(define_mode_iterator VEC_I [V16QI V8HI V4SI])
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
 
 ;; Vector float modes
 (define_mode_iterator VEC_F [V4SF V2DF])
 
 ;; Vector arithmetic modes
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
 
 ;; Vector modes that need alginment via permutes
 (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
@@ -45,7 +45,7 @@
 (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
 
 ;; Vector comparison modes
-(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
 
 ;; Vector init/extract modes
 (define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -54,7 +54,7 @@
 (define_mode_iterator VEC_64 [V2DI V2DF])
 
 ;; Vector reload iterator
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI])
 
 ;; Base type from vector mode
 (define_mode_attr VEC_base [(V16QI "QI")
@@ -126,7 +126,9 @@
         (match_operand:VEC_L 1 "input_operand" ""))]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -249,7 +251,7 @@
   [(set (match_operand:VEC_F 0 "vfloat_operand" "")
 	(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
 		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
-  "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
     {
@@ -395,7 +397,7 @@
 			  (match_operand:VEC_I 5 "vint_operand" "")])
 	 (match_operand:VEC_I 1 "vint_operand" "")
 	 (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "
 {
   if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
@@ -451,7 +453,7 @@
 			  (match_operand:VEC_I 5 "vint_operand" "")])
 	 (match_operand:VEC_I 1 "vint_operand" "")
 	 (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "
 {
   if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
@@ -505,14 +507,14 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		   (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 (define_expand "vector_geu<mode>"
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		   (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 (define_insn_and_split "*vector_uneq<mode>"
@@ -708,48 +710,19 @@
   "")
 
 
-;; Vector logical instructions
-(define_expand "xor<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "ior<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "and<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "one_cmpl<mode>2"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "nor<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-			      (match_operand:VEC_L 2 "vlogical_operand" ""))))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "andc<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))
-		   (match_operand:VEC_L 1 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+	(clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+        (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
 
+
 ;; Same size conversions
 (define_expand "float<VEC_int><mode>2"
   [(set (match_operand:VEC_F 0 "vfloat_operand" "")
@@ -1064,7 +1037,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		      (match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for arithmetic shift left on each vector element
@@ -1072,7 +1045,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		      (match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for logical shift right on each vector element
@@ -1080,7 +1053,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 			(match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for arithmetic shift right on each vector element
@@ -1088,7 +1061,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 			(match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Vector reduction expanders for VSX
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3fafd9b278c4..827c44a15823 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -40,6 +40,14 @@
 ;; it to use gprs as well as vsx registers.
 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
 
+(define_mode_iterator VSX_M2 [V16QI
+			      V8HI
+			      V4SI
+			      V2DI
+			      V4SF
+			      V2DF
+			      (TI	"TARGET_VSX_TIMODE")])
+
 ;; Map into the appropriate load/store name based on the type
 (define_mode_attr VSm  [(V16QI "vw4")
 			(V8HI  "vw4")
@@ -48,7 +56,7 @@
 			(V2DF  "vd2")
 			(V2DI  "vd2")
 			(DF    "d")
-			(TI    "vw4")])
+			(TI    "vd2")])
 
 ;; Map into the appropriate suffix based on the type
 (define_mode_attr VSs	[(V16QI "sp")
@@ -59,7 +67,7 @@
 			 (V2DI  "dp")
 			 (DF    "dp")
 			 (SF	"sp")
-			 (TI    "sp")])
+			 (TI    "dp")])
 
 ;; Map the register class used
 (define_mode_attr VSr	[(V16QI "v")
@@ -70,7 +78,7 @@
 			 (V2DF  "wd")
 			 (DF    "ws")
 			 (SF	"d")
-			 (TI    "wd")])
+			 (TI    "wt")])
 
 ;; Map the register class used for float<->int conversions
 (define_mode_attr VSr2	[(V2DF  "wd")
@@ -115,7 +123,6 @@
 			 (V4SF  "v")
 			 (V2DI  "v")
 			 (V2DF  "v")
-			 (TI    "v")
 			 (DF    "s")])
 
 ;; Appropriate type for add ops (and other simple FP ops)
@@ -192,6 +199,8 @@
    UNSPEC_VSX_CVDPSXWS
    UNSPEC_VSX_CVDPUXWS
    UNSPEC_VSX_CVSPDP
+   UNSPEC_VSX_CVSPDPN
+   UNSPEC_VSX_CVDPSPN
    UNSPEC_VSX_CVSXWDP
    UNSPEC_VSX_CVUXWDP
    UNSPEC_VSX_CVSXDSP
@@ -208,75 +217,38 @@
 
 ;; VSX moves
 (define_insn "*vsx_mov<mode>"
-  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
-	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
   "VECTOR_MEM_VSX_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
        || register_operand (operands[1], <MODE>mode))"
 {
-  switch (which_alternative)
-    {
-    case 0:
-    case 3:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stx<VSm>x %x1,%y0";
-
-    case 1:
-    case 4:
-      gcc_assert (MEM_P (operands[1])
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
-      return "lx<VSm>x %x0,%y1";
-
-    case 2:
-    case 5:
-      return "xxlor %x0,%x1,%x1";
-
-    case 6:
-    case 7:
-    case 8:
-    case 11:
-      return "#";
-
-    case 9:
-    case 10:
-      return "xxlxor %x0,%x0,%x0";
-
-    case 12:
-      return output_vec_const_move (operands);
-
-    case 13:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stvx %1,%y0";
-
-    case 14:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "lvx %0,%y1";
-
-    default:
-      gcc_unreachable ();
-    }
+  return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
-
-;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
-;; unions.  However for plain data movement, slightly favor the vector loads
-(define_insn "*vsx_movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?Y,?r,?r,wa,v,v,wZ")
-	(match_operand:TI 1 "input_operand" "wa,Z,wa,r,Y,r,j,W,wZ,v"))]
-  "VECTOR_MEM_VSX_P (TImode)
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+   (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
+
+;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
+;; use of TImode is for unions.  However for plain data movement, slightly
+;; favor the vector loads
+(define_insn "*vsx_movti_64bit"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+	(match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
    && (register_operand (operands[0], TImode) 
        || register_operand (operands[1], TImode))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+   (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
+
+(define_insn "*vsx_movti_32bit"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v,r,r,    Q,    Y,    r,n"))]
+  "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
+   && (register_operand (operands[0], TImode)
+       || register_operand (operands[1], TImode))"
 {
   switch (which_alternative)
     {
@@ -290,27 +262,45 @@
       return "xxlor %x0,%x1,%x1";
 
     case 3:
+      return "xxlxor %x0,%x0,%x0";
+
     case 4:
+      return output_vec_const_move (operands);
+
     case 5:
-      return "#";
+      return "stvx %1,%y0";
 
     case 6:
-      return "xxlxor %x0,%x0,%x0";
+      return "lvx %0,%y1";
 
     case 7:
-      return output_vec_const_move (operands);
+      if (TARGET_STRING)
+        return \"stswi %1,%P0,16\";
 
     case 8:
-      return "stvx %1,%y0";
+      return \"#\";
 
     case 9:
-      return "lvx %0,%y1";
+      /* If the address is not used in the output, we can use lsi.  Otherwise,
+	 fall through to generating four loads.  */
+      if (TARGET_STRING
+          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+	return \"lswi %0,%P1,16\";
+      /* ... fall through ...  */
 
+    case 10:
+    case 11:
+    case 12:
+      return \"#\";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
+   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,      16,      16,     16,     16,16,16")
+   (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
+   			                  (const_string "always")
+					  (const_string "conditional")))])
 
 ;; Explicit  load/store expanders for the builtin functions
 (define_expand "vsx_load_<mode>"
@@ -320,46 +310,48 @@
   "")
 
 (define_expand "vsx_store_<mode>"
-  [(set (match_operand:VEC_M 0 "memory_operand" "")
-	(match_operand:VEC_M 1 "vsx_register_operand" ""))]
+  [(set (match_operand:VSX_M 0 "memory_operand" "")
+	(match_operand:VSX_M 1 "vsx_register_operand" ""))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
   "")
 
 
-;; VSX scalar and vector floating point arithmetic instructions
+;; VSX vector floating point arithmetic instructions.  The VSX scalar
+;; instructions are now combined with the insn for the traditional floating
+;; point unit.
 (define_insn "*vsx_add<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>add<VSs> %x0,%x1,%x2"
+  "xvadd<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_sub<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		     (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>sub<VSs> %x0,%x1,%x2"
+  "xvsub<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_mul<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>mul<VSs> %x0,%x1,%x2"
-  [(set_attr "type" "<VStype_mul>")
+  "xvmul<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
 (define_insn "*vsx_div<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		   (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>div<VSs> %x0,%x1,%x2"
+  "xvdiv<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_div>")
    (set_attr "fp_type" "<VSfptype_div>")])
 
@@ -402,94 +394,72 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_fre<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_FRES))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>re<VSs> %x0,%x1"
+  "xvre<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_neg<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>neg<VSs> %x0,%x1"
+  "xvneg<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_abs<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>abs<VSs> %x0,%x1"
+  "xvabs<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_nabs<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (neg:VSX_B
-	 (abs:VSX_B
-	  (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_F
+	 (abs:VSX_F
+	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>nabs<VSs> %x0,%x1"
+  "xvnabs<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_smax<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>max<VSs> %x0,%x1,%x2"
+  "xvmax<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_smin<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>min<VSs> %x0,%x1,%x2"
+  "xvmin<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-;; Special VSX version of smin/smax for single precision floating point.  Since
-;; both numbers are rounded to single precision, we can just use the DP version
-;; of the instruction.
-
-(define_insn "*vsx_smaxsf3"
-  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
-        (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
-		 (match_operand:SF 2 "vsx_register_operand" "f")))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "xsmaxdp %x0,%x1,%x2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
-(define_insn "*vsx_sminsf3"
-  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
-        (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
-		 (match_operand:SF 2 "vsx_register_operand" "f")))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "xsmindp %x0,%x1,%x2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
 (define_insn "*vsx_sqrt<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>sqrt<VSs> %x0,%x1"
+  "xvsqrt<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_sqrt>")
    (set_attr "fp_type" "<VSfptype_sqrt>")])
 
 (define_insn "*vsx_rsqrte<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_RSQRT))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>rsqrte<VSs> %x0,%x1"
+  "xvrsqrte<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -528,26 +498,10 @@
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-;; Fused vector multiply/add instructions Support the classical DF versions of
-;; fma, which allows the target to be a separate register from the 3 inputs.
-;; Under VSX, the target must be either the addend or the first multiply.
-;; Where we can, also do the same for the Altivec V4SF fmas.
-
-(define_insn "*vsx_fmadf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(fma:DF
-	  (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
-	  (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	  (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsmaddadp %x0,%x1,%x2
-   xsmaddmdp %x0,%x1,%x3
-   xsmaddadp %x0,%x1,%x2
-   xsmaddmdp %x0,%x1,%x3
-   fmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
+;; Fused vector multiply/add instructions. Support the classical Altivec
+;; versions of fma, which allows the target to be a separate register from the
+;; 3 inputs.  Under VSX, the target must be either the addend or the first
+;; multiply.
 
 (define_insn "*vsx_fmav4sf4"
   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
@@ -578,23 +532,6 @@
    xvmaddmdp %x0,%x1,%x3"
   [(set_attr "type" "vecdouble")])
 
-(define_insn "*vsx_fmsdf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(fma:DF
-	  (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
-	  (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	  (neg:DF
-	    (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsmsubadp %x0,%x1,%x2
-   xsmsubmdp %x0,%x1,%x3
-   xsmsubadp %x0,%x1,%x2
-   xsmsubmdp %x0,%x1,%x3
-   fmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
 (define_insn "*vsx_fms<mode>4"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 	(fma:VSX_F
@@ -604,29 +541,12 @@
 	    (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
-   x<VSv>msuba<VSs> %x0,%x1,%x2
-   x<VSv>msubm<VSs> %x0,%x1,%x3
-   x<VSv>msuba<VSs> %x0,%x1,%x2
-   x<VSv>msubm<VSs> %x0,%x1,%x3"
+   xvmsuba<VSs> %x0,%x1,%x2
+   xvmsubm<VSs> %x0,%x1,%x3
+   xvmsuba<VSs> %x0,%x1,%x2
+   xvmsubm<VSs> %x0,%x1,%x3"
   [(set_attr "type" "<VStype_mul>")])
 
-(define_insn "*vsx_nfmadf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(neg:DF
-	 (fma:DF
-	  (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
-	  (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	  (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsnmaddadp %x0,%x1,%x2
-   xsnmaddmdp %x0,%x1,%x3
-   xsnmaddadp %x0,%x1,%x2
-   xsnmaddmdp %x0,%x1,%x3
-   fnmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
 (define_insn "*vsx_nfma<mode>4"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 	(neg:VSX_F
@@ -636,31 +556,13 @@
 	  (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
-   x<VSv>nmadda<VSs> %x0,%x1,%x2
-   x<VSv>nmaddm<VSs> %x0,%x1,%x3
-   x<VSv>nmadda<VSs> %x0,%x1,%x2
-   x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+   xvnmadda<VSs> %x0,%x1,%x2
+   xvnmaddm<VSs> %x0,%x1,%x3
+   xvnmadda<VSs> %x0,%x1,%x2
+   xvnmaddm<VSs> %x0,%x1,%x3"
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
-(define_insn "*vsx_nfmsdf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(neg:DF
-	 (fma:DF
-	   (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
-	   (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	   (neg:DF
-	     (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsnmsubadp %x0,%x1,%x2
-   xsnmsubmdp %x0,%x1,%x3
-   xsnmsubadp %x0,%x1,%x2
-   xsnmsubmdp %x0,%x1,%x3
-   fnmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
 (define_insn "*vsx_nfmsv4sf4"
   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
 	(neg:V4SF
@@ -722,16 +624,6 @@
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-;; Floating point scalar compare
-(define_insn "*vsx_cmpdf_internal1"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
-	(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
-		      (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_VSX_P (DFmode)"
-  "xscmpudp %0,%x1,%x2"
-  [(set_attr "type" "fpcompare")])
-
 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 ;; indicate a combined status
 (define_insn "*vsx_eq_<mode>_p"
@@ -798,13 +690,13 @@
 
 ;; Copy sign
 (define_insn "vsx_copysign<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B
-	 [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F
+	 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
 	 UNSPEC_COPYSIGN))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
+  "xvcpsgn<VSs> %x0,%x2,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -865,10 +757,10 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_btrunc<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>r<VSs>iz %x0,%x1"
+  "xvr<VSs>iz %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -882,20 +774,20 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_floor<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_FRIM))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>r<VSs>im %x0,%x1"
+  "xvr<VSs>im %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_ceil<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_FRIP))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>r<VSs>ip %x0,%x1"
+  "xvr<VSs>ip %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -942,6 +834,40 @@
   "xscvspdp %x0,%x1"
   [(set_attr "type" "fp")])
 
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
 ;; Convert from 64-bit to 32-bit types
 ;; Note, favor the Altivec registers since the usual use of these instructions
 ;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1026,72 +952,15 @@
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-
-;; Logical and permute operations
-(define_insn "*vsx_and<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (and:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxland %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_ior<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-		   (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_xor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (xor:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlxor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_one_cmpl<mode>2"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlnor %x0,%x1,%x1"
-  [(set_attr "type" "vecsimple")])
-  
-(define_insn "*vsx_nor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (ior:VSX_L
-	  (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlnor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_andc<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (and:VSX_L
-	 (not:VSX_L
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlandc %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
 
 ;; Permute operations
 
 ;; Build a V2DF/V2DI vector from two scalars
 (define_insn "vsx_concat_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
-	(unspec:VSX_D
-	 [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
-	  (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
-	 UNSPEC_VSX_CONCAT))]
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
+	(vec_concat:VSX_D
+	 (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
+	 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxpermdi %x0,%x1,%x2,0"
   [(set_attr "type" "vecperm")])
@@ -1149,7 +1018,11 @@
 	 (parallel [(const_int 0)])))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
   "lxsd%U1x %x0,%y1"
-  [(set_attr "type" "fpload")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
    (set_attr "length" "4")])  
 
 ;; Extract a SF element from V4SF
@@ -1213,8 +1086,8 @@
       if (<MODE>mode != V2DImode)
 	{
 	  target = gen_lowpart (V2DImode, target);
-	  op0 = gen_lowpart (V2DImode, target);
-	  op1 = gen_lowpart (V2DImode, target);
+	  op0 = gen_lowpart (V2DImode, op0);
+	  op1 = gen_lowpart (V2DImode, op1);
 	}
     }
   emit_insn (gen (target, op0, op1, perm0, perm1));
@@ -1483,3 +1356,27 @@
 }"
   [(set_attr "length" "20")
    (set_attr "type" "veccomplex")])
+
+
+;; Power8 Vector fusion.  The fused ops must be physically adjacent.
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_dup 0)
+			    (match_operand:P 3 "int_reg_operand" ""))))]
+  "TARGET_VSX && TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
+			    (match_dup 0))))]
+  "TARGET_VSX && TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 3885993997d2..8702a17a22ae 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -13931,6 +13931,341 @@ if the VSX instruction set is available.  The @samp{vec_vsx_ld} and
 @samp{vec_vsx_st} built-in functions always generate the VSX @samp{LXVD2X},
 @samp{LXVW4X}, @samp{STXVD2X}, and @samp{STXVW4X} instructions.
 
+If the ISA 2.07 additions to the vector/scalar (power8-vector)
+instruction set is available, the following additional functions are
+available for both 32-bit and 64-bit targets.  For 64-bit targets, you
+can use @var{vector long} instead of @var{vector long long},
+@var{vector bool long} instead of @var{vector bool long long}, and
+@var{vector unsigned long} instead of @var{vector unsigned long long}.
+
+@smallexample
+vector long long vec_abs (vector long long);
+
+vector long long vec_add (vector long long, vector long long);
+vector unsigned long long vec_add (vector unsigned long long,
+                                   vector unsigned long long);
+
+int vec_all_eq (vector long long, vector long long);
+int vec_all_ge (vector long long, vector long long);
+int vec_all_gt (vector long long, vector long long);
+int vec_all_le (vector long long, vector long long);
+int vec_all_lt (vector long long, vector long long);
+int vec_all_ne (vector long long, vector long long);
+int vec_any_eq (vector long long, vector long long);
+int vec_any_ge (vector long long, vector long long);
+int vec_any_gt (vector long long, vector long long);
+int vec_any_le (vector long long, vector long long);
+int vec_any_lt (vector long long, vector long long);
+int vec_any_ne (vector long long, vector long long);
+
+vector long long vec_eqv (vector long long, vector long long);
+vector long long vec_eqv (vector bool long long, vector long long);
+vector long long vec_eqv (vector long long, vector bool long long);
+vector unsigned long long vec_eqv (vector unsigned long long,
+                                   vector unsigned long long);
+vector unsigned long long vec_eqv (vector bool long long,
+                                   vector unsigned long long);
+vector unsigned long long vec_eqv (vector unsigned long long,
+                                   vector bool long long);
+vector int vec_eqv (vector int, vector int);
+vector int vec_eqv (vector bool int, vector int);
+vector int vec_eqv (vector int, vector bool int);
+vector unsigned int vec_eqv (vector unsigned int, vector unsigned int);
+vector unsigned int vec_eqv (vector bool unsigned int,
+                             vector unsigned int);
+vector unsigned int vec_eqv (vector unsigned int,
+                             vector bool unsigned int);
+vector short vec_eqv (vector short, vector short);
+vector short vec_eqv (vector bool short, vector short);
+vector short vec_eqv (vector short, vector bool short);
+vector unsigned short vec_eqv (vector unsigned short, vector unsigned short);
+vector unsigned short vec_eqv (vector bool unsigned short,
+                               vector unsigned short);
+vector unsigned short vec_eqv (vector unsigned short,
+                               vector bool unsigned short);
+vector signed char vec_eqv (vector signed char, vector signed char);
+vector signed char vec_eqv (vector bool signed char, vector signed char);
+vector signed char vec_eqv (vector signed char, vector bool signed char);
+vector unsigned char vec_eqv (vector unsigned char, vector unsigned char);
+vector unsigned char vec_eqv (vector bool unsigned char, vector unsigned char);
+vector unsigned char vec_eqv (vector unsigned char, vector bool unsigned char);
+
+vector long long vec_max (vector long long, vector long long);
+vector unsigned long long vec_max (vector unsigned long long,
+                                   vector unsigned long long);
+
+vector long long vec_min (vector long long, vector long long);
+vector unsigned long long vec_min (vector unsigned long long,
+                                   vector unsigned long long);
+
+vector long long vec_nand (vector long long, vector long long);
+vector long long vec_nand (vector bool long long, vector long long);
+vector long long vec_nand (vector long long, vector bool long long);
+vector unsigned long long vec_nand (vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned long long vec_nand (vector bool long long,
+                                   vector unsigned long long);
+vector unsigned long long vec_nand (vector unsigned long long,
+                                    vector bool long long);
+vector int vec_nand (vector int, vector int);
+vector int vec_nand (vector bool int, vector int);
+vector int vec_nand (vector int, vector bool int);
+vector unsigned int vec_nand (vector unsigned int, vector unsigned int);
+vector unsigned int vec_nand (vector bool unsigned int,
+                              vector unsigned int);
+vector unsigned int vec_nand (vector unsigned int,
+                              vector bool unsigned int);
+vector short vec_nand (vector short, vector short);
+vector short vec_nand (vector bool short, vector short);
+vector short vec_nand (vector short, vector bool short);
+vector unsigned short vec_nand (vector unsigned short, vector unsigned short);
+vector unsigned short vec_nand (vector bool unsigned short,
+                                vector unsigned short);
+vector unsigned short vec_nand (vector unsigned short,
+                                vector bool unsigned short);
+vector signed char vec_nand (vector signed char, vector signed char);
+vector signed char vec_nand (vector bool signed char, vector signed char);
+vector signed char vec_nand (vector signed char, vector bool signed char);
+vector unsigned char vec_nand (vector unsigned char, vector unsigned char);
+vector unsigned char vec_nand (vector bool unsigned char, vector unsigned char);
+vector unsigned char vec_nand (vector unsigned char, vector bool unsigned char);
+
+vector long long vec_orc (vector long long, vector long long);
+vector long long vec_orc (vector bool long long, vector long long);
+vector long long vec_orc (vector long long, vector bool long long);
+vector unsigned long long vec_orc (vector unsigned long long,
+                                   vector unsigned long long);
+vector unsigned long long vec_orc (vector bool long long,
+                                   vector unsigned long long);
+vector unsigned long long vec_orc (vector unsigned long long,
+                                   vector bool long long);
+vector int vec_orc (vector int, vector int);
+vector int vec_orc (vector bool int, vector int);
+vector int vec_orc (vector int, vector bool int);
+vector unsigned int vec_orc (vector unsigned int, vector unsigned int);
+vector unsigned int vec_orc (vector bool unsigned int,
+                             vector unsigned int);
+vector unsigned int vec_orc (vector unsigned int,
+                             vector bool unsigned int);
+vector short vec_orc (vector short, vector short);
+vector short vec_orc (vector bool short, vector short);
+vector short vec_orc (vector short, vector bool short);
+vector unsigned short vec_orc (vector unsigned short, vector unsigned short);
+vector unsigned short vec_orc (vector bool unsigned short,
+                               vector unsigned short);
+vector unsigned short vec_orc (vector unsigned short,
+                               vector bool unsigned short);
+vector signed char vec_orc (vector signed char, vector signed char);
+vector signed char vec_orc (vector bool signed char, vector signed char);
+vector signed char vec_orc (vector signed char, vector bool signed char);
+vector unsigned char vec_orc (vector unsigned char, vector unsigned char);
+vector unsigned char vec_orc (vector bool unsigned char, vector unsigned char);
+vector unsigned char vec_orc (vector unsigned char, vector bool unsigned char);
+
+vector int vec_pack (vector long long, vector long long);
+vector unsigned int vec_pack (vector unsigned long long,
+                              vector unsigned long long);
+vector bool int vec_pack (vector bool long long, vector bool long long);
+
+vector int vec_packs (vector long long, vector long long);
+vector unsigned int vec_packs (vector unsigned long long,
+                               vector unsigned long long);
+
+vector unsigned int vec_packsu (vector long long, vector long long);
+
+vector long long vec_rl (vector long long,
+                         vector unsigned long long);
+vector long long vec_rl (vector unsigned long long,
+                         vector unsigned long long);
+
+vector long long vec_sl (vector long long, vector unsigned long long);
+vector long long vec_sl (vector unsigned long long,
+                         vector unsigned long long);
+
+vector long long vec_sr (vector long long, vector unsigned long long);
+vector unsigned long long char vec_sr (vector unsigned long long,
+                                       vector unsigned long long);
+
+vector long long vec_sra (vector long long, vector unsigned long long);
+vector unsigned long long vec_sra (vector unsigned long long,
+                                   vector unsigned long long);
+
+vector long long vec_sub (vector long long, vector long long);
+vector unsigned long long vec_sub (vector unsigned long long,
+                                   vector unsigned long long);
+
+vector long long vec_unpackh (vector int);
+vector unsigned long long vec_unpackh (vector unsigned int);
+
+vector long long vec_unpackl (vector int);
+vector unsigned long long vec_unpackl (vector unsigned int);
+
+vector long long vec_vaddudm (vector long long, vector long long);
+vector long long vec_vaddudm (vector bool long long, vector long long);
+vector long long vec_vaddudm (vector long long, vector bool long long);
+vector unsigned long long vec_vaddudm (vector unsigned long long,
+                                       vector unsigned long long);
+vector unsigned long long vec_vaddudm (vector bool unsigned long long,
+                                       vector unsigned long long);
+vector unsigned long long vec_vaddudm (vector unsigned long long,
+                                       vector bool unsigned long long);
+
+vector long long vec_vclz (vector long long);
+vector unsigned long long vec_vclz (vector unsigned long long);
+vector int vec_vclz (vector int);
+vector unsigned int vec_vclz (vector int);
+vector short vec_vclz (vector short);
+vector unsigned short vec_vclz (vector unsigned short);
+vector signed char vec_vclz (vector signed char);
+vector unsigned char vec_vclz (vector unsigned char);
+
+vector signed char vec_vclzb (vector signed char);
+vector unsigned char vec_vclzb (vector unsigned char);
+
+vector long long vec_vclzd (vector long long);
+vector unsigned long long vec_vclzd (vector unsigned long long);
+
+vector short vec_vclzh (vector short);
+vector unsigned short vec_vclzh (vector unsigned short);
+
+vector int vec_vclzw (vector int);
+vector unsigned int vec_vclzw (vector int);
+
+vector long long vec_vmaxsd (vector long long, vector long long);
+
+vector unsigned long long vec_vmaxud (vector unsigned long long,
+                                      unsigned vector long long);
+
+vector long long vec_vminsd (vector long long, vector long long);
+
+vector unsigned long long vec_vminud (vector long long,
+                                      vector long long);
+
+vector int vec_vpksdss (vector long long, vector long long);
+vector unsigned int vec_vpksdss (vector long long, vector long long);
+
+vector unsigned int vec_vpkudus (vector unsigned long long,
+                                 vector unsigned long long);
+
+vector int vec_vpkudum (vector long long, vector long long);
+vector unsigned int vec_vpkudum (vector unsigned long long,
+                                 vector unsigned long long);
+vector bool int vec_vpkudum (vector bool long long, vector bool long long);
+
+vector long long vec_vpopcnt (vector long long);
+vector unsigned long long vec_vpopcnt (vector unsigned long long);
+vector int vec_vpopcnt (vector int);
+vector unsigned int vec_vpopcnt (vector int);
+vector short vec_vpopcnt (vector short);
+vector unsigned short vec_vpopcnt (vector unsigned short);
+vector signed char vec_vpopcnt (vector signed char);
+vector unsigned char vec_vpopcnt (vector unsigned char);
+
+vector signed char vec_vpopcntb (vector signed char);
+vector unsigned char vec_vpopcntb (vector unsigned char);
+
+vector long long vec_vpopcntd (vector long long);
+vector unsigned long long vec_vpopcntd (vector unsigned long long);
+
+vector short vec_vpopcnth (vector short);
+vector unsigned short vec_vpopcnth (vector unsigned short);
+
+vector int vec_vpopcntw (vector int);
+vector unsigned int vec_vpopcntw (vector int);
+
+vector long long vec_vrld (vector long long, vector unsigned long long);
+vector unsigned long long vec_vrld (vector unsigned long long,
+                                    vector unsigned long long);
+
+vector long long vec_vsld (vector long long, vector unsigned long long);
+vector long long vec_vsld (vector unsigned long long,
+                           vector unsigned long long);
+
+vector long long vec_vsrad (vector long long, vector unsigned long long);
+vector unsigned long long vec_vsrad (vector unsigned long long,
+                                     vector unsigned long long);
+
+vector long long vec_vsrd (vector long long, vector unsigned long long);
+vector unsigned long long char vec_vsrd (vector unsigned long long,
+                                         vector unsigned long long);
+
+vector long long vec_vsubudm (vector long long, vector long long);
+vector long long vec_vsubudm (vector bool long long, vector long long);
+vector long long vec_vsubudm (vector long long, vector bool long long);
+vector unsigned long long vec_vsubudm (vector unsigned long long,
+                                       vector unsigned long long);
+vector unsigned long long vec_vsubudm (vector bool long long,
+                                       vector unsigned long long);
+vector unsigned long long vec_vsubudm (vector unsigned long long,
+                                       vector bool long long);
+
+vector long long vec_vupkhsw (vector int);
+vector unsigned long long vec_vupkhsw (vector unsigned int);
+
+vector long long vec_vupklsw (vector int);
+vector unsigned long long vec_vupklsw (vector int);
+@end smallexample
+
+If the cryptographic instructions are enabled (@option{-mcrypto} or
+@option{-mcpu=power8}), the following builtins are enabled.
+
+@smallexample
+vector unsigned long long __builtin_crypto_vsbox (vector unsigned long long);
+
+vector unsigned long long __builtin_crypto_vcipher (vector unsigned long long,
+                                                    vector unsigned long long);
+
+vector unsigned long long __builtin_crypto_vcipherlast
+                                     (vector unsigned long long,
+                                      vector unsigned long long);
+
+vector unsigned long long __builtin_crypto_vncipher (vector unsigned long long,
+                                                     vector unsigned long long);
+
+vector unsigned long long __builtin_crypto_vncipherlast
+                                     (vector unsigned long long,
+                                      vector unsigned long long);
+
+vector unsigned char __builtin_crypto_vpermxor (vector unsigned char,
+                                                vector unsigned char,
+                                                vector unsigned char);
+
+vector unsigned short __builtin_crypto_vpermxor (vector unsigned short,
+                                                 vector unsigned short,
+                                                 vector unsigned short);
+
+vector unsigned int __builtin_crypto_vpermxor (vector unsigned int,
+                                               vector unsigned int,
+                                               vector unsigned int);
+
+vector unsigned long long __builtin_crypto_vpermxor (vector unsigned long long,
+                                                     vector unsigned long long,
+                                                     vector unsigned long long);
+
+vector unsigned char __builtin_crypto_vpmsumb (vector unsigned char,
+                                               vector unsigned char);
+
+vector unsigned short __builtin_crypto_vpmsumb (vector unsigned short,
+                                                vector unsigned short);
+
+vector unsigned int __builtin_crypto_vpmsumb (vector unsigned int,
+                                              vector unsigned int);
+
+vector unsigned long long __builtin_crypto_vpmsumb (vector unsigned long long,
+                                                    vector unsigned long long);
+
+vector unsigned long long __builtin_crypto_vshasigmad
+                               (vector unsigned long long, int, int);
+
+vector unsigned int __builtin_crypto_vshasigmaw (vector unsigned int,
+                                                 int, int);
+@end smallexample
+
+The second argument to the @var{__builtin_crypto_vshasigmad} and
+@var{__builtin_crypto_vshasigmaw} builtin functions must be a constant
+integer that is 0 or 1.  The third argument to these builtin functions
+must be a constant integer in the range of 0 to 15.
+
 @node RX Built-in Functions
 @subsection RX Built-in Functions
 GCC supports some of the RX instructions which cannot be expressed in
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c257933be45c..ba689d93d82b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -855,7 +855,10 @@ See RS/6000 and PowerPC Options.
 -mno-recip-precision @gol
 -mveclibabi=@var{type} -mfriz -mno-friz @gol
 -mpointers-to-nested-functions -mno-pointers-to-nested-functions @gol
--msave-toc-indirect -mno-save-toc-indirect}
+-msave-toc-indirect -mno-save-toc-indirect @gol
+-mpower8-fusion -mno-mpower8-fusion -mpower8-vector -mno-power8-vector @gol
+-mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol
+-mquad-memory -mno-quad-memory}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -17239,7 +17242,8 @@ following options:
 @gccoptlist{-maltivec  -mfprnd  -mhard-float  -mmfcrf  -mmultiple @gol
 -mpopcntb -mpopcntd  -mpowerpc64 @gol
 -mpowerpc-gpopt  -mpowerpc-gfxopt  -msingle-float -mdouble-float @gol
--msimple-fpu -mstring  -mmulhw  -mdlmzb  -mmfpgpr -mvsx}
+-msimple-fpu -mstring  -mmulhw  -mdlmzb  -mmfpgpr -mvsx @gol
+-mcrypto -mdirect-move -mpower8-fusion -mpower8-vector -mquad-memory}
 
 The particular options set for any particular CPU varies between
 compiler versions, depending on what setting seems to produce optimal
@@ -17357,6 +17361,47 @@ Generate code that uses (does not use) vector/scalar (VSX)
 instructions, and also enable the use of built-in functions that allow
 more direct access to the VSX instruction set.
 
+@item -mcrypto
+@itemx -mno-crypto
+@opindex mcrypto
+@opindex mno-crypto
+Enable the use (disable) of the built-in functions that allow direct
+access to the cryptographic instructions that were added in version
+2.07 of the PowerPC ISA.
+
+@item -mdirect-move
+@itemx -mno-direct-move
+@opindex mdirect-move
+@opindex mno-direct-move
+Generate code that uses (does not use) the instructions to move data
+between the general purpose registers and the vector/scalar (VSX)
+registers that were added in version 2.07 of the PowerPC ISA.
+
+@item -mpower8-fusion
+@itemx -mno-power8-fusion
+@opindex mpower8-fusion
+@opindex mno-power8-fusion
+Generate code that keeps (does not keeps) some integer operations
+adjacent so that the instructions can be fused together on power8 and
+later processors.
+
+@item -mpower8-vector
+@itemx -mno-power8-vector
+@opindex mpower8-vector
+@opindex mno-power8-vector
+Generate code that uses (does not use) the vector and scalar
+instructions that were added in version 2.07 of the PowerPC ISA.  Also
+enable the use of built-in functions that allow more direct access to
+the vector instructions.
+
+@item -mquad-memory
+@itemx -mno-quad-memory
+@opindex mquad-memory
+@opindex mno-quad-memory
+Generate code that uses (does not use) the quad word memory
+instructions.  The @option{-mquad-memory} option requires use of
+64-bit mode.
+
 @item -mfloat-gprs=@var{yes/single/double/no}
 @itemx -mfloat-gprs
 @opindex mfloat-gprs
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 63ec92f6b255..dacb83a70093 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -2055,7 +2055,7 @@ Any constant whose absolute value is no greater than 4-bits.
 
 @end table
 
-@item PowerPC and IBM RS6000---@file{config/rs6000/rs6000.h}
+@item PowerPC and IBM RS6000---@file{config/rs6000/constraints.md}
 @table @code
 @item b
 Address base register
@@ -2069,17 +2069,57 @@ Floating point register (containing 32-bit value)
 @item v
 Altivec vector register
 
+@item wa
+Any VSX register if the -mvsx option was used or NO_REGS.
+
 @item wd
-VSX vector register to hold vector double data
+VSX vector register to hold vector double data or NO_REGS.
 
 @item wf
-VSX vector register to hold vector float data
+VSX vector register to hold vector float data or NO_REGS.
+
+@item wg
+If @option{-mmfpgpr} was used, a floating point register or NO_REGS.
+
+@item wl
+Floating point register if the LFIWAX instruction is enabled or NO_REGS.
+
+@item wm
+VSX register if direct move instructions are enabled, or NO_REGS.
+
+@item wn
+No register (NO_REGS).
+
+@item wr
+General purpose register if 64-bit instructions are enabled or NO_REGS.
 
 @item ws
-VSX vector register to hold scalar float data
+VSX vector register to hold scalar double values or NO_REGS.
 
-@item wa
-Any VSX register
+@item wt
+VSX vector register to hold 128 bit integer or NO_REGS.
+
+@item wu
+Altivec register to use for float/32-bit int loads/stores  or NO_REGS.
+
+@item wv
+Altivec register to use for double loads/stores  or NO_REGS.
+
+@item ww
+FP or VSX register to perform float operations under @option{-mvsx} or NO_REGS.
+
+@item wx
+Floating point register if the STFIWX instruction is enabled or NO_REGS.
+
+@item wy
+VSX vector register to hold scalar float values or NO_REGS.
+
+@item wz
+Floating point register if the LFIWZX instruction is enabled or NO_REGS.
+
+@item wQ
+A memory address that will work with the @code{lq} and @code{stq}
+instructions.
 
 @item h
 @samp{MQ}, @samp{CTR}, or @samp{LINK} register
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 398776ad132d..0aacc7143b53 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,178 @@
+2014-04-04  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	Power8 Base Support
+	Backport from mainline
+	2013-11-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/59054
+	* gcc.target/powerpc/direct-move.h (VSX_REG_ATTR): Allow test to
+	specify an appropriate register class for VSX operations.
+	(load_vsx): Use it.
+	(load_gpr_to_vsx): Likewise.
+	(load_vsx_to_gpr): Likewise.
+	* gcc.target/powerpc/direct-move-vint1.c: Use an appropriate
+	register class for VSX registers that the type can handle.  Remove
+	checks for explicit number of instructions generated, just check
+	if the instruction is generated.
+	* gcc.target/powerpc/direct-move-vint2.c: Likewise.
+	* gcc.target/powerpc/direct-move-float1.c: Likewise.
+	* gcc.target/powerpc/direct-move-float2.c: Likewise.
+	* gcc.target/powerpc/direct-move-double1.c: Likewise.
+	* gcc.target/powerpc/direct-move-double2.c: Likewise.
+	* gcc.target/powerpc/direct-move-long1.c: Likewise.
+	* gcc.target/powerpc/direct-move-long2.c: Likewise.
+
+	* gcc.target/powerpc/bool3-av.c: Limit to 64-bit mode for now.
+	* gcc.target/powerpc/bool3-p7.c: Likewise.
+	* gcc.target/powerpc/bool3-p8.c: Likewise.
+
+	* gcc.target/powerpc/p8vector-ldst.c: Just check that the
+	appropriate instructions are generated, don't check the count.
+
+	2013-11-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/59054
+	* gcc.target/powerpc/pr59054.c: New test.
+
+	2013-08-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/pr57744.c: Declare abort.
+
+	2013-07-18  Pat Haugen  <pthaugen@us.ibm.com>
+
+	* gcc.target/powerpc/pr57744.c: Fix typo.
+
+	Back port from mainline
+	2013-10-03  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/p8vector-fp.c: New test for floating point
+	scalar operations when using -mupper-regs-sf and -mupper-regs-df.
+	* gcc.target/powerpc/ppc-target-1.c: Update tests to allow either
+	VSX scalar operations or the traditional floating point form of
+	the instruction.
+	* gcc.target/powerpc/ppc-target-2.c: Likewise.
+	* gcc.target/powerpc/recip-3.c: Likewise.
+	* gcc.target/powerpc/recip-5.c: Likewise.
+	* gcc.target/powerpc/pr72747.c: Likewise.
+	* gcc.target/powerpc/vsx-builtin-3.c: Likewise.
+
+	Back port from mainline
+	2013-09-27  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf
+	and -mupper-regs-df.
+
+	Back port from mainline
+	2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/58673
+	* gcc.target/powerpc/pr58673-1.c: New file to test whether
+	-mquad-word + -mno-vsx-timode causes errors.
+	* gcc.target/powerpc/pr58673-2.c: Likewise.
+
+
+	Backport from trunk.
+	2013-07-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/bool2.h: New file, test the code generation
+	of logical operations for power5, altivec, power7, and power8 systems.
+	* gcc.target/powerpc/bool2-p5.c: Likewise.
+	* gcc.target/powerpc/bool2-av.c: Likewise.
+	* gcc.target/powerpc/bool2-p7.c: Likewise.
+	* gcc.target/powerpc/bool2-p8.c: Likewise.
+	* gcc.target/powerpc/bool3.h: Likewise.
+	* gcc.target/powerpc/bool3-av.c: Likewise.
+	* gcc.target/powerpc/bool2-p7.c: Likewise.
+	* gcc.target/powerpc/bool2-p8.c: Likewise.
+
+	Backport from trunk.
+	2013-07-31  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/fusion.c: New file, test power8 fusion support.
+
+	Back port from the trunk
+	2013-06-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/57744
+	* gcc.target/powerpc/pr57744.c: New test to make sure lqarx and
+	stqcx. get even registers.
+
+	Back port from the trunk
+
+	2013-06-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* gcc.target/powerpc/atomic-p7.c: New file, add tests for atomic
+	load/store instructions on power7, power8.
+	* gcc.target/powerpc/atomic-p8.c: Likewise.
+
+	Back port from the trunk
+
+	2013-06-10  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* gcc.target/powerpc/direct-move-vint1.c: New tests for power8
+	direct move instructions.
+	* gcc.target/powerpc/direct-move-vint2.c: Likewise.
+	* gcc.target/powerpc/direct-move.h: Likewise.
+	* gcc.target/powerpc/direct-move-float1.c: Likewise.
+	* gcc.target/powerpc/direct-move-float2.c: Likewise.
+	* gcc.target/powerpc/direct-move-double1.c: Likewise.
+	* gcc.target/powerpc/direct-move-double2.c: Likewise.
+	* gcc.target/powerpc/direct-move-long1.c: Likewise.
+	* gcc.target/powerpc/direct-move-long2.c: Likewise.
+
+	Backport from the trunk
+
+	2013-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* gcc.target/powerpc/p8vector-builtin-1.c: New test to test
+	power8 builtin functions.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c: New
+	tests to test power8 auto-vectorization.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c: Likewise.
+	* gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c: Likewise.
+
+	* gcc.target/powerpc/crypto-builtin-1.c: Use effective target
+	powerpc_p8vector_ok instead of powerpc_vsx_ok.
+
+	* gcc.target/powerpc/bool.c: New file, add eqv, nand, nor tests.
+
+	* lib/target-supports.exp (check_p8vector_hw_available) Add power8
+	support.
+	(check_effective_target_powerpc_p8vector_ok): Likewise.
+	(is-effective-target): Likewise.
+	(check_vect_support_and_set_flags): Likewise.
+
+	Backport from trunk
+
+	2013-05-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+		    Pat Haugen <pthaugen@us.ibm.com>
+		    Peter Bergner <bergner@vnet.ibm.com>
+
+	* gcc.target/powerpc/crypto-builtin-1.c: New file, test for power8
+	crypto builtins.
+
+	Backport from mainline
+	2013-03-20  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/mmfpgpr.c: New test.
+	* gcc.target/powerpc/sd-vsx.c: Likewise.
+	* gcc.target/powerpc/sd-pwr6.c: Likewise.
+	* gcc.target/powerpc/vsx-float0.c: Likewise.
+
 2014-04-01  Dominique d'Humieres  <dominiq@lps.ens.fr>
 
 	PR libfortran/60128
diff --git a/gcc/testsuite/gcc.target/powerpc/atomic-p7.c b/gcc/testsuite/gcc.target/powerpc/atomic-p7.c
new file mode 100644
index 000000000000..3442bfba4cdb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/atomic-p7.c
@@ -0,0 +1,207 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-not "lbarx" } } */
+/* { dg-final { scan-assembler-not "lharx" } } */
+/* { dg-final { scan-assembler-times "lwarx" 18 } } */
+/* { dg-final { scan-assembler-times "ldarx" 6 } } */
+/* { dg-final { scan-assembler-not "lqarx" } } */
+/* { dg-final { scan-assembler-not "stbcx" } } */
+/* { dg-final { scan-assembler-not "sthcx" } } */
+/* { dg-final { scan-assembler-times "stwcx" 18 } } */
+/* { dg-final { scan-assembler-times "stdcx" 6 } } */
+/* { dg-final { scan-assembler-not "stqcx" } } */
+/* { dg-final { scan-assembler-times "bl __atomic" 6 } } */
+/* { dg-final { scan-assembler-times "isync" 12 } } */
+/* { dg-final { scan-assembler-times "lwsync" 8 } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+
+/* Test for the byte atomic operations on power8 using lbarx/stbcx.  */
+char
+char_fetch_add_relaxed (char *ptr, int value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+char
+char_fetch_sub_consume (char *ptr, int value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+char
+char_fetch_and_acquire (char *ptr, int value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+char
+char_fetch_ior_release (char *ptr, int value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+char
+char_fetch_xor_acq_rel (char *ptr, int value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+char
+char_fetch_nand_seq_cst (char *ptr, int value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the half word atomic operations on power8 using lharx/sthcx.  */
+short
+short_fetch_add_relaxed (short *ptr, int value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+short
+short_fetch_sub_consume (short *ptr, int value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+short
+short_fetch_and_acquire (short *ptr, int value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+short
+short_fetch_ior_release (short *ptr, int value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+short
+short_fetch_xor_acq_rel (short *ptr, int value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+short
+short_fetch_nand_seq_cst (short *ptr, int value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the word atomic operations on power8 using lwarx/stwcx.  */
+int
+int_fetch_add_relaxed (int *ptr, int value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+int
+int_fetch_sub_consume (int *ptr, int value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+int
+int_fetch_and_acquire (int *ptr, int value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+int
+int_fetch_ior_release (int *ptr, int value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+int
+int_fetch_xor_acq_rel (int *ptr, int value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+int
+int_fetch_nand_seq_cst (int *ptr, int value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the double word atomic operations on power8 using ldarx/stdcx.  */
+long
+long_fetch_add_relaxed (long *ptr, long value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+long
+long_fetch_sub_consume (long *ptr, long value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+long
+long_fetch_and_acquire (long *ptr, long value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+long
+long_fetch_ior_release (long *ptr, long value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+long
+long_fetch_xor_acq_rel (long *ptr, long value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+long
+long_fetch_nand_seq_cst (long *ptr, long value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx.  */
+__int128_t
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+__int128_t
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+__int128_t
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+__int128_t
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+__int128_t
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+__int128_t
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/atomic-p8.c b/gcc/testsuite/gcc.target/powerpc/atomic-p8.c
new file mode 100644
index 000000000000..17460ac4c73a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/atomic-p8.c
@@ -0,0 +1,237 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "lbarx" 7 } } */
+/* { dg-final { scan-assembler-times "lharx" 7 } } */
+/* { dg-final { scan-assembler-times "lwarx" 7 } } */
+/* { dg-final { scan-assembler-times "ldarx" 7 } } */
+/* { dg-final { scan-assembler-times "lqarx" 7 } } */
+/* { dg-final { scan-assembler-times "stbcx" 7 } } */
+/* { dg-final { scan-assembler-times "sthcx" 7 } } */
+/* { dg-final { scan-assembler-times "stwcx" 7 } } */
+/* { dg-final { scan-assembler-times "stdcx" 7 } } */
+/* { dg-final { scan-assembler-times "stqcx" 7 } } */
+/* { dg-final { scan-assembler-not "bl __atomic" } } */
+/* { dg-final { scan-assembler-times "isync" 20 } } */
+/* { dg-final { scan-assembler-times "lwsync" 10 } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+
+/* Test for the byte atomic operations on power8 using lbarx/stbcx.  */
+char
+char_fetch_add_relaxed (char *ptr, int value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+char
+char_fetch_sub_consume (char *ptr, int value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+char
+char_fetch_and_acquire (char *ptr, int value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+char
+char_fetch_ior_release (char *ptr, int value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+char
+char_fetch_xor_acq_rel (char *ptr, int value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+char
+char_fetch_nand_seq_cst (char *ptr, int value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+char_val_compare_and_swap (char *p, int i, int j, char *q)
+{
+  *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the half word atomic operations on power8 using lharx/sthcx.  */
+short
+short_fetch_add_relaxed (short *ptr, int value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+short
+short_fetch_sub_consume (short *ptr, int value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+short
+short_fetch_and_acquire (short *ptr, int value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+short
+short_fetch_ior_release (short *ptr, int value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+short
+short_fetch_xor_acq_rel (short *ptr, int value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+short
+short_fetch_nand_seq_cst (short *ptr, int value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+short_val_compare_and_swap (short *p, int i, int j, short *q)
+{
+  *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the word atomic operations on power8 using lwarx/stwcx.  */
+int
+int_fetch_add_relaxed (int *ptr, int value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+int
+int_fetch_sub_consume (int *ptr, int value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+int
+int_fetch_and_acquire (int *ptr, int value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+int
+int_fetch_ior_release (int *ptr, int value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+int
+int_fetch_xor_acq_rel (int *ptr, int value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+int
+int_fetch_nand_seq_cst (int *ptr, int value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+int_val_compare_and_swap (int *p, int i, int j, int *q)
+{
+  *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the double word atomic operations on power8 using ldarx/stdcx.  */
+long
+long_fetch_add_relaxed (long *ptr, long value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+long
+long_fetch_sub_consume (long *ptr, long value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+long
+long_fetch_and_acquire (long *ptr, long value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+long
+long_fetch_ior_release (long *ptr, long value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+long
+long_fetch_xor_acq_rel (long *ptr, long value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+long
+long_fetch_nand_seq_cst (long *ptr, long value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+long_val_compare_and_swap (long *p, long i, long j, long *q)
+{
+  *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx.  */
+__int128_t
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+__int128_t
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+__int128_t
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+__int128_t
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+__int128_t
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+__int128_t
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
+{
+  return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+quad_val_compare_and_swap (__int128_t *p, __int128_t i, __int128_t j, __int128_t *q)
+{
+  *q = __sync_val_compare_and_swap (p, i, j);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bool.c b/gcc/testsuite/gcc.target/powerpc/bool.c
new file mode 100644
index 000000000000..f007db4b5bd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "eqv" } } */
+/* { dg-final { scan-assembler "nand" } } */
+/* { dg-final { scan-assembler "nor" } } */
+
+#ifndef TYPE
+#define TYPE unsigned long
+#endif
+
+TYPE op1 (TYPE a, TYPE b) { return ~(a ^ b); }	/* eqv */
+TYPE op2 (TYPE a, TYPE b) { return ~(a & b); }	/* nand */
+TYPE op3 (TYPE a, TYPE b) { return ~(a | b); }	/* nor */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/bool2-av.c b/gcc/testsuite/gcc.target/powerpc/bool2-av.c
new file mode 100644
index 000000000000..fc56ce261c84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool2-av.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -mcpu=power6 -maltivec" } */
+/* { dg-final { scan-assembler-not "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]eqv "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]orc "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]nand "    } } */
+/* { dg-final { scan-assembler     "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler     "\[ \t\]vandc "   } } */
+/* { dg-final { scan-assembler     "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler     "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler     "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
+
+#ifndef TYPE
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+
+#include "bool2.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool2-p5.c b/gcc/testsuite/gcc.target/powerpc/bool2-p5.c
new file mode 100644
index 000000000000..e4810d00d88a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool2-p5.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -mcpu=power5 -mabi=altivec -mno-altivec -mno-vsx" } */
+/* { dg-final { scan-assembler	   "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler	   "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler	   "\[ \t\]eqv "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]orc "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]nand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vandc "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
+
+#ifndef TYPE
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+
+#include "bool2.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool2-p7.c b/gcc/testsuite/gcc.target/powerpc/bool2-p7.c
new file mode 100644
index 000000000000..274fcb090e2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool2-p7.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler-not "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]eqv "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]orc "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]nand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
+
+#ifndef TYPE
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+
+#include "bool2.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool2-p8.c b/gcc/testsuite/gcc.target/powerpc/bool2-p8.c
new file mode 100644
index 000000000000..34f4d2df8b20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool2-p8.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mcpu=power8" } */
+/* { dg-final { scan-assembler-not "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]eqv "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]orc "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]nand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vandc "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler     "\[ \t\]xxlnand " } } */
+
+#ifndef TYPE
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+
+#include "bool2.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool2.h b/gcc/testsuite/gcc.target/powerpc/bool2.h
new file mode 100644
index 000000000000..4513944c2983
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool2.h
@@ -0,0 +1,29 @@
+/* Test various logical operations.  */
+
+TYPE arg1 (TYPE p, TYPE q) { return p & q; }		/* AND  */
+TYPE arg2 (TYPE p, TYPE q) { return p | q; }		/* OR   */
+TYPE arg3 (TYPE p, TYPE q) { return p ^ q; }		/* XOR  */
+TYPE arg4 (TYPE p)	   { return ~ p; }		/* NOR  */
+TYPE arg5 (TYPE p, TYPE q) { return ~(p & q); }		/* NAND */
+TYPE arg6 (TYPE p, TYPE q) { return ~(p | q); }		/* NOR  */
+TYPE arg7 (TYPE p, TYPE q) { return ~(p ^ q); }		/* EQV  */
+TYPE arg8 (TYPE p, TYPE q) { return (~p) & q; }		/* ANDC */
+TYPE arg9 (TYPE p, TYPE q) { return (~p) | q; }		/* ORC  */
+TYPE arg10(TYPE p, TYPE q) { return (~p) ^ q; }		/* EQV  */
+TYPE arg11(TYPE p, TYPE q) { return p & (~q); }		/* ANDC */
+TYPE arg12(TYPE p, TYPE q) { return p | (~q); }		/* ORC  */
+TYPE arg13(TYPE p, TYPE q) { return p ^ (~q); }		/* EQV  */
+
+void ptr1 (TYPE *p) { p[0] = p[1] & p[2]; }		/* AND  */
+void ptr2 (TYPE *p) { p[0] = p[1] | p[2]; }		/* OR   */
+void ptr3 (TYPE *p) { p[0] = p[1] ^ p[2]; }		/* XOR  */
+void ptr4 (TYPE *p) { p[0] = ~p[1]; }			/* NOR  */
+void ptr5 (TYPE *p) { p[0] = ~(p[1] & p[2]); }		/* NAND */
+void ptr6 (TYPE *p) { p[0] = ~(p[1] | p[2]); }		/* NOR  */
+void ptr7 (TYPE *p) { p[0] = ~(p[1] ^ p[2]); }		/* EQV  */
+void ptr8 (TYPE *p) { p[0] = ~(p[1]) & p[2]; }		/* ANDC */
+void ptr9 (TYPE *p) { p[0] = (~p[1]) | p[2]; }		/* ORC  */
+void ptr10(TYPE *p) { p[0] = (~p[1]) ^ p[2]; }		/* EQV  */
+void ptr11(TYPE *p) { p[0] = p[1] & (~p[2]); }		/* ANDC */
+void ptr12(TYPE *p) { p[0] = p[1] | (~p[2]); }		/* ORC  */
+void ptr13(TYPE *p) { p[0] = p[1] ^ (~p[2]); }		/* EQV  */
diff --git a/gcc/testsuite/gcc.target/powerpc/bool3-av.c b/gcc/testsuite/gcc.target/powerpc/bool3-av.c
new file mode 100644
index 000000000000..d4aac786b2c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool3-av.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -mcpu=power6 -mabi=altivec -maltivec -mno-vsx" } */
+/* { dg-final { scan-assembler	   "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler	   "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vandc "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
+
+/* On altivec, for 128-bit types, ORC/ANDC/EQV might not show up, since the
+   vector unit doesn't support these, so the appropriate combine patterns may
+   not be generated.  */
+
+#ifndef TYPE
+#ifdef _ARCH_PPC64
+#define TYPE __int128_t
+#else
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+#endif
+
+#include "bool3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool3-p7.c b/gcc/testsuite/gcc.target/powerpc/bool3-p7.c
new file mode 100644
index 000000000000..34e3c9e79dd3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool3-p7.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler	   "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler	   "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vandc "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
+
+/* On power7, for 128-bit types, ORC/ANDC/EQV might not show up, since the
+   vector unit doesn't support these, so the appropriate combine patterns may
+   not be generated.  */
+
+#ifndef TYPE
+#ifdef _ARCH_PPC64
+#define TYPE __int128_t
+#else
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+#endif
+
+#include "bool3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool3-p8.c b/gcc/testsuite/gcc.target/powerpc/bool3-p8.c
new file mode 100644
index 000000000000..e1b2dfa7ee2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool3-p8.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mcpu=power8" } */
+/* { dg-final { scan-assembler	   "\[ \t\]and "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]or "      } } */
+/* { dg-final { scan-assembler	   "\[ \t\]xor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]nor "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]andc "    } } */
+/* { dg-final { scan-assembler	   "\[ \t\]eqv "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]orc "     } } */
+/* { dg-final { scan-assembler	   "\[ \t\]nand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vand "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vandc "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vor "     } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vxor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]vnor "    } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxland "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlor "   } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlxor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnor "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlandc " } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxleqv "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlorc "  } } */
+/* { dg-final { scan-assembler-not "\[ \t\]xxlnand " } } */
+
+#ifndef TYPE
+#ifdef _ARCH_PPC64
+#define TYPE __int128_t
+#else
+typedef int v4si __attribute__ ((vector_size (16)));
+#define TYPE v4si
+#endif
+#endif
+
+#include "bool3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/bool3.h b/gcc/testsuite/gcc.target/powerpc/bool3.h
new file mode 100644
index 000000000000..7b99a4a6103d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bool3.h
@@ -0,0 +1,186 @@
+/* Test forcing 128-bit logical types into GPR registers.  */
+
+#if defined(NO_ASM)
+#define FORCE_REG1(X)
+#define FORCE_REG2(X,Y)
+
+#else
+#if defined(USE_ALTIVEC)
+#define REG_CLASS "+v"
+#define PRINT_REG1 "# altivec reg %0"
+#define PRINT_REG2 "# altivec reg %0, %1"
+
+#elif defined(USE_FPR)
+#define REG_CLASS "+d"
+#define PRINT_REG1 "# fpr reg %0"
+#define PRINT_REG2 "# fpr reg %0, %1"
+
+#elif defined(USE_VSX)
+#define REG_CLASS "+wa"
+#define PRINT_REG1 "# vsx reg %x0"
+#define PRINT_REG2 "# vsx reg %x0, %x1"
+
+#else
+#define REG_CLASS "+r"
+#define PRINT_REG1 "# gpr reg %0"
+#define PRINT_REG2 "# gpr reg %0, %1"
+#endif
+
+#define FORCE_REG1(X) __asm__ (PRINT_REG1 : REG_CLASS (X))
+#define FORCE_REG2(X,Y) __asm__ (PRINT_REG2 : REG_CLASS (X), REG_CLASS (Y))
+#endif
+
+void ptr1 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = a & b;					/* AND */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr2 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = a | b;					/* OR */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr3 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = a ^ b;					/* XOR */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr4 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b;
+
+  FORCE_REG1 (a);
+  b = ~a;					/* NOR */
+  FORCE_REG1 (b);
+  p[0] = b;
+}
+
+void ptr5 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = ~(a & b);					   /* NAND */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr6 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = ~(a | b);					   /* AND */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr7 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = ~(a ^ b);					   /* EQV */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr8 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = (~a) & b;					   /* ANDC */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr9 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = (~a) | b;					   /* ORC */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr10 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = (~a) ^ b;					   /* EQV */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr11 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = a & (~b);					   /* ANDC */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr12 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = a | (~b);					   /* ORC */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
+
+void ptr13 (TYPE *p)
+{
+  TYPE a = p[1];
+  TYPE b = p[2];
+  TYPE c;
+
+  FORCE_REG2 (a, b);
+  c = a ^ (~b);					   /* AND */
+  FORCE_REG1 (c);
+  p[0] = c;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c b/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
new file mode 100644
index 000000000000..87291954ee38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/crypto-builtin-1.c
@@ -0,0 +1,130 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+typedef vector unsigned long long	crypto_t;
+typedef vector unsigned long long	v2di_t;
+typedef vector unsigned int		v4si_t;
+typedef vector unsigned short		v8hi_t;
+typedef vector unsigned char		v16qi_t;
+
+crypto_t crpyto1 (crypto_t a)
+{
+  return __builtin_crypto_vsbox (a);
+}
+
+crypto_t crypto2 (crypto_t a, crypto_t b)
+{
+  return __builtin_crypto_vcipher (a, b);
+}
+
+crypto_t crypto3 (crypto_t a, crypto_t b)
+{
+  return __builtin_crypto_vcipherlast (a, b);
+}
+
+crypto_t crypto4 (crypto_t a, crypto_t b)
+{
+  return __builtin_crypto_vncipher (a, b);
+}
+
+crypto_t crypto5 (crypto_t a, crypto_t b)
+{
+  return __builtin_crypto_vncipherlast (a, b);
+}
+
+v16qi_t crypto6a (v16qi_t a, v16qi_t b, v16qi_t c)
+{
+  return __builtin_crypto_vpermxor (a, b, c);
+}
+
+v8hi_t crypto6b (v8hi_t a, v8hi_t b, v8hi_t c)
+{
+  return __builtin_crypto_vpermxor (a, b, c);
+}
+
+v4si_t crypto6c (v4si_t a, v4si_t b, v4si_t c)
+{
+  return __builtin_crypto_vpermxor (a, b, c);
+}
+
+v2di_t crypto6d (v2di_t a, v2di_t b, v2di_t c)
+{
+  return __builtin_crypto_vpermxor (a, b, c);
+}
+
+v16qi_t crypto7a (v16qi_t a, v16qi_t b)
+{
+  return __builtin_crypto_vpmsumb (a, b);
+}
+
+v16qi_t crypto7b (v16qi_t a, v16qi_t b)
+{
+  return __builtin_crypto_vpmsum (a, b);
+}
+
+v8hi_t crypto7c (v8hi_t a, v8hi_t b)
+{
+  return __builtin_crypto_vpmsumh (a, b);
+}
+
+v8hi_t crypto7d (v8hi_t a, v8hi_t b)
+{
+  return __builtin_crypto_vpmsum (a, b);
+}
+
+v4si_t crypto7e (v4si_t a, v4si_t b)
+{
+  return __builtin_crypto_vpmsumw (a, b);
+}
+
+v4si_t crypto7f (v4si_t a, v4si_t b)
+{
+  return __builtin_crypto_vpmsum (a, b);
+}
+
+v2di_t crypto7g (v2di_t a, v2di_t b)
+{
+  return __builtin_crypto_vpmsumd (a, b);
+}
+
+v2di_t crypto7h (v2di_t a, v2di_t b)
+{
+  return __builtin_crypto_vpmsum (a, b);
+}
+
+v2di_t crypto8a (v2di_t a)
+{
+  return __builtin_crypto_vshasigmad (a, 0, 8);
+}
+
+v2di_t crypto8b (v2di_t a)
+{
+  return __builtin_crypto_vshasigma (a, 0, 8);
+}
+
+v4si_t crypto8c (v4si_t a)
+{
+  return __builtin_crypto_vshasigmaw (a, 1, 15);
+}
+
+v4si_t crypto8d (v4si_t a)
+{
+  return __builtin_crypto_vshasigma (a, 1, 15);
+}
+
+/* Note space is used after the instruction so that vcipherlast does not match
+   vcipher.  */
+/* { dg-final { scan-assembler-times "vcipher "      1 } } */
+/* { dg-final { scan-assembler-times "vcipherlast "  1 } } */
+/* { dg-final { scan-assembler-times "vncipher "     1 } } */
+/* { dg-final { scan-assembler-times "vncipherlast " 1 } } */
+/* { dg-final { scan-assembler-times "vpermxor "     4 } } */
+/* { dg-final { scan-assembler-times "vpmsumb "      2 } } */
+/* { dg-final { scan-assembler-times "vpmsumd "      2 } } */
+/* { dg-final { scan-assembler-times "vpmsumh "      2 } } */
+/* { dg-final { scan-assembler-times "vpmsumw "      2 } } */
+/* { dg-final { scan-assembler-times "vsbox "        1 } } */
+/* { dg-final { scan-assembler-times "vshasigmad "   2 } } */
+/* { dg-final { scan-assembler-times "vshasigmaw "   2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
new file mode 100644
index 000000000000..2569ac84369c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler "mtvsrd" } } */
+/* { dg-final { scan-assembler "mfvsrd" } } */
+
+/* Check code generation for direct move for double types.  */
+
+#define TYPE double
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+#define VSX_REG_ATTR "ws"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
new file mode 100644
index 000000000000..c8702204b703
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime.  */
+
+#define TYPE double
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+#define DO_MAIN
+#define VSX_REG_ATTR "ws"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
new file mode 100644
index 000000000000..524c0eead431
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler "mtvsrd" } } */
+/* { dg-final { scan-assembler "mfvsrd" } } */
+/* { dg-final { scan-assembler "xscvdpspn" } } */
+/* { dg-final { scan-assembler "xscvspdpn" } } */
+
+/* Check code generation for direct move for float types.  */
+
+#define TYPE float
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+#define VSX_REG_ATTR "ww"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
new file mode 100644
index 000000000000..352e76166d08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime.  */
+
+#define TYPE float
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+#define DO_MAIN
+#define VSX_REG_ATTR "ww"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
new file mode 100644
index 000000000000..0a78f9cb2586
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler "mtvsrd" } } */
+/* { dg-final { scan-assembler "mfvsrd" } } */
+
+/* Check code generation for direct move for long types.  */
+
+#define TYPE long
+#define IS_INT 1
+#define NO_ALTIVEC 1
+#define VSX_REG_ATTR "d"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
new file mode 100644
index 000000000000..cee9e0e0f1d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime.  */
+
+#define TYPE long
+#define IS_INT 1
+#define NO_ALTIVEC 1
+#define DO_MAIN
+#define VSX_REG_ATTR "d"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
new file mode 100644
index 000000000000..3067b9a8e627
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler "mtvsrd" } } */
+/* { dg-final { scan-assembler "mfvsrd" } } */
+
+/* Check code generation for direct move for vector types.  */
+
+#define TYPE vector int
+#define VSX_REG_ATTR "wa"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
new file mode 100644
index 000000000000..0d8264faf718
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime.  */
+
+#define TYPE vector int
+#define DO_MAIN
+#define VSX_REG_ATTR "wa"
+
+#include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move.h b/gcc/testsuite/gcc.target/powerpc/direct-move.h
new file mode 100644
index 000000000000..af61c966f01a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move.h
@@ -0,0 +1,187 @@
+/* Test functions for direct move support.  */
+
+
+#ifndef VSX_REG_ATTR
+#define VSX_REG_ATTR "wa"
+#endif
+
+void __attribute__((__noinline__))
+copy (TYPE *a, TYPE *b)
+{
+  *b = *a;
+}
+
+#ifndef NO_GPR
+void __attribute__((__noinline__))
+load_gpr (TYPE *a, TYPE *b)
+{
+  TYPE c = *a;
+  __asm__ ("# gpr, reg = %0" : "+b" (c));
+  *b = c;
+}
+#endif
+
+#ifndef NO_FPR
+void __attribute__((__noinline__))
+load_fpr (TYPE *a, TYPE *b)
+{
+  TYPE c = *a;
+  __asm__ ("# fpr, reg = %0" : "+d" (c));
+  *b = c;
+}
+#endif
+
+#ifndef NO_ALTIVEC
+void __attribute__((__noinline__))
+load_altivec (TYPE *a, TYPE *b)
+{
+  TYPE c = *a;
+  __asm__ ("# altivec, reg = %0" : "+v" (c));
+  *b = c;
+}
+#endif
+
+#ifndef NO_VSX
+void __attribute__((__noinline__))
+load_vsx (TYPE *a, TYPE *b)
+{
+  TYPE c = *a;
+  __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (c));
+  *b = c;
+}
+#endif
+
+#ifndef NO_GPR_TO_VSX
+void __attribute__((__noinline__))
+load_gpr_to_vsx (TYPE *a, TYPE *b)
+{
+  TYPE c = *a;
+  TYPE d;
+  __asm__ ("# gpr, reg = %0" : "+b" (c));
+  d = c;
+  __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (d));
+  *b = d;
+}
+#endif
+
+#ifndef NO_VSX_TO_GPR
+void __attribute__((__noinline__))
+load_vsx_to_gpr (TYPE *a, TYPE *b)
+{
+  TYPE c = *a;
+  TYPE d;
+  __asm__ ("# vsx, reg = %x0" : "+" VSX_REG_ATTR (c));
+  d = c;
+  __asm__ ("# gpr, reg = %0" : "+b" (d));
+  *b = d;
+}
+#endif
+
+#ifdef DO_MAIN
+typedef void (fn_type (TYPE *, TYPE *));
+
+struct test_struct {
+  fn_type *func;
+  const char *name;
+};
+
+const struct test_struct test_functions[] = {
+  { copy,		"copy"		  },
+#ifndef NO_GPR
+  { load_gpr,		"load_gpr"	  },
+#endif
+#ifndef NO_FPR
+  { load_fpr,		"load_fpr"	  },
+#endif
+#ifndef NO_ALTIVEC
+  { load_altivec,	"load_altivec"	  },
+#endif
+#ifndef NO_VSX
+  { load_vsx,		"load_vsx"	  },
+#endif
+#ifndef NO_GPR_TO_VSX
+  { load_gpr_to_vsx,	"load_gpr_to_vsx" },
+#endif
+#ifndef NO_VSX_TO_GPR
+  { load_vsx_to_gpr,	"load_vsx_to_gpr" },
+#endif
+};
+
+/* Test a given value for each of the functions.  */
+void __attribute__((__noinline__))
+test_value (TYPE a)
+{
+  size_t i;
+
+  for (i = 0; i < sizeof (test_functions) / sizeof (test_functions[0]); i++)
+    {
+      TYPE b;
+
+      test_functions[i].func (&a, &b);
+      if (memcmp ((void *)&a, (void *)&b, sizeof (TYPE)) != 0)
+	abort ();
+    }
+}
+
+/* Main program.  */
+int
+main (void)
+{
+  size_t i;
+  long j;
+  union {
+    TYPE value;
+    unsigned char bytes[sizeof (TYPE)];
+  } u;
+
+#if IS_INT
+  TYPE value = (TYPE)-5;
+  for (i = 0; i < 12; i++)
+    {
+      test_value (value);
+      value++;
+    }
+
+  for (i = 0; i < 8*sizeof (TYPE); i++)
+    test_value (((TYPE)1) << i);
+
+#elif IS_UNS
+  TYPE value = (TYPE)0;
+  for (i = 0; i < 10; i++)
+    {
+      test_value (value);
+      test_value (~ value);
+      value++;
+    }
+
+  for (i = 0; i < 8*sizeof (TYPE); i++)
+    test_value (((TYPE)1) << i);
+
+#elif IS_FLOAT
+  TYPE value = (TYPE)-5;
+  for (i = 0; i < 12; i++)
+    {
+      test_value (value);
+      value++;
+    }
+
+  test_value ((TYPE)3.1415926535);
+  test_value ((TYPE)1.23456);
+  test_value ((TYPE)(-0.0));
+  test_value ((TYPE)NAN);
+  test_value ((TYPE)+INFINITY);
+  test_value ((TYPE)-INFINITY);
+#else
+
+  for (j = 0; j < 10; j++)
+    {
+      for (i = 0; i < sizeof (TYPE); i++)
+	u.bytes[i] = (unsigned char) (random () >> 4);
+
+      test_value (u.value);
+    }
+#endif
+
+  return 0;
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion.c b/gcc/testsuite/gcc.target/powerpc/fusion.c
new file mode 100644
index 000000000000..3bea1c9f5a8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fusion.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */
+
+#define LARGE 0x12345
+
+int fusion_uchar (unsigned char *p){ return p[LARGE]; }
+int fusion_schar (signed char *p){ return p[LARGE]; }
+int fusion_ushort (unsigned short *p){ return p[LARGE]; }
+int fusion_short (short *p){ return p[LARGE]; }
+int fusion_int (int *p){ return p[LARGE]; }
+unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
+
+vector double fusion_vector (vector double *p) { return p[2]; }
+
+/* { dg-final { scan-assembler-times "gpr load fusion"    6 } } */
+/* { dg-final { scan-assembler-times "vector load fusion" 1 } } */
+/* { dg-final { scan-assembler-times "lbz"                2 } } */
+/* { dg-final { scan-assembler-times "extsb"              1 } } */
+/* { dg-final { scan-assembler-times "lhz"                2 } } */
+/* { dg-final { scan-assembler-times "extsh"              1 } } */
+/* { dg-final { scan-assembler-times "lwz"                2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c b/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
new file mode 100644
index 000000000000..7f2d3d3eff98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mmfpgpr.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power6x -mmfpgpr" } */
+/* { dg-final { scan-assembler "mffgpr" } } */
+/* { dg-final { scan-assembler "mftgpr" } } */
+
+/* Test that we generate the instructions to move between the GPR and FPR
+   registers under power6x.  */
+
+extern long return_long (void);
+extern double return_double (void);
+
+double return_double2 (void)
+{
+  return (double) return_long ();
+}
+
+long return_long2 (void)
+{
+  return (long) return_double ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
new file mode 100644
index 000000000000..6fd3acc2a9db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-1.c
@@ -0,0 +1,65 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+#ifndef TYPE
+#define TYPE long long
+#endif
+
+#ifndef SIGN_TYPE
+#define SIGN_TYPE signed TYPE
+#endif
+
+#ifndef UNS_TYPE
+#define UNS_TYPE unsigned TYPE
+#endif
+
+typedef vector SIGN_TYPE v_sign;
+typedef vector UNS_TYPE  v_uns;
+
+v_sign sign_add (v_sign a, v_sign b)
+{
+  return a + b;
+}
+
+v_sign sign_sub (v_sign a, v_sign b)
+{
+  return a - b;
+}
+
+v_sign sign_shift_left (v_sign a, v_sign b)
+{
+  return a << b;
+}
+
+v_sign sign_shift_right (v_sign a, v_sign b)
+{
+  return a >> b;
+}
+
+v_uns uns_add (v_uns a, v_uns b)
+{
+  return a + b;
+}
+
+v_uns uns_sub (v_uns a, v_uns b)
+{
+  return a - b;
+}
+
+v_uns uns_shift_left (v_uns a, v_uns b)
+{
+  return a << b;
+}
+
+v_uns uns_shift_right (v_uns a, v_uns b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler-times "vaddudm" 2 } } */
+/* { dg-final { scan-assembler-times "vsubudm" 2 } } */
+/* { dg-final { scan-assembler-times "vsld"    2 } } */
+/* { dg-final { scan-assembler-times "vsrad"   1 } } */
+/* { dg-final { scan-assembler-times "vsrd"    1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
new file mode 100644
index 000000000000..412040bfa057
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-2.c
@@ -0,0 +1,204 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+#include <altivec.h>
+
+typedef vector long long		v_sign;
+typedef vector unsigned long long	v_uns;
+typedef vector bool long long		v_bool;
+
+v_sign sign_add_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vaddudm (a, b);
+}
+
+v_sign sign_add_2 (v_sign a, v_sign b)
+{
+  return vec_add (a, b);
+}
+
+v_sign sign_add_3 (v_sign a, v_sign b)
+{
+  return vec_vaddudm (a, b);
+}
+
+v_sign sign_sub_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vsubudm (a, b);
+}
+
+v_sign sign_sub_2 (v_sign a, v_sign b)
+{
+  return vec_sub (a, b);
+}
+
+
+v_sign sign_sub_3 (v_sign a, v_sign b)
+{
+  return vec_vsubudm (a, b);
+}
+
+v_sign sign_min_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vminsd (a, b);
+}
+
+v_sign sign_min_2 (v_sign a, v_sign b)
+{
+  return vec_min (a, b);
+}
+
+v_sign sign_min_3 (v_sign a, v_sign b)
+{
+  return vec_vminsd (a, b);
+}
+
+v_sign sign_max_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vmaxsd (a, b);
+}
+
+v_sign sign_max_2 (v_sign a, v_sign b)
+{
+  return vec_max (a, b);
+}
+
+v_sign sign_max_3 (v_sign a, v_sign b)
+{
+  return vec_vmaxsd (a, b);
+}
+
+v_sign sign_abs (v_sign a)
+{
+  return vec_abs (a);		/* xor, vsubudm, vmaxsd.  */
+}
+
+v_bool sign_eq (v_sign a, v_sign b)
+{
+  return vec_cmpeq (a, b);
+}
+
+v_bool sign_lt (v_sign a, v_sign b)
+{
+  return vec_cmplt (a, b);
+}
+
+v_uns uns_add_2 (v_uns a, v_uns b)
+{
+  return vec_add (a, b);
+}
+
+v_uns uns_add_3 (v_uns a, v_uns b)
+{
+  return vec_vaddudm (a, b);
+}
+
+v_uns uns_sub_2 (v_uns a, v_uns b)
+{
+  return vec_sub (a, b);
+}
+
+v_uns uns_sub_3 (v_uns a, v_uns b)
+{
+  return vec_vsubudm (a, b);
+}
+
+v_uns uns_min_2 (v_uns a, v_uns b)
+{
+  return vec_min (a, b);
+}
+
+v_uns uns_min_3 (v_uns a, v_uns b)
+{
+  return vec_vminud (a, b);
+}
+
+v_uns uns_max_2 (v_uns a, v_uns b)
+{
+  return vec_max (a, b);
+}
+
+v_uns uns_max_3 (v_uns a, v_uns b)
+{
+  return vec_vmaxud (a, b);
+}
+
+v_bool uns_eq (v_uns a, v_uns b)
+{
+  return vec_cmpeq (a, b);
+}
+
+v_bool uns_lt (v_uns a, v_uns b)
+{
+  return vec_cmplt (a, b);
+}
+
+v_sign sign_rl_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vrld (a, b);
+}
+
+v_sign sign_rl_2 (v_sign a, v_uns b)
+{
+  return vec_rl (a, b);
+}
+
+v_uns uns_rl_2 (v_uns a, v_uns b)
+{
+  return vec_rl (a, b);
+}
+
+v_sign sign_sl_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vsld (a, b);
+}
+
+v_sign sign_sl_2 (v_sign a, v_uns b)
+{
+  return vec_sl (a, b);
+}
+
+v_sign sign_sl_3 (v_sign a, v_uns b)
+{
+  return vec_vsld (a, b);
+}
+
+v_uns uns_sl_2 (v_uns a, v_uns b)
+{
+  return vec_sl (a, b);
+}
+
+v_uns uns_sl_3 (v_uns a, v_uns b)
+{
+  return vec_vsld (a, b);
+}
+
+v_sign sign_sra_1 (v_sign a, v_sign b)
+{
+  return __builtin_altivec_vsrad (a, b);
+}
+
+v_sign sign_sra_2 (v_sign a, v_uns b)
+{
+  return vec_sra (a, b);
+}
+
+v_sign sign_sra_3 (v_sign a, v_uns b)
+{
+  return vec_vsrad (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vaddudm" 	5 } } */
+/* { dg-final { scan-assembler-times "vsubudm" 	6 } } */
+/* { dg-final { scan-assembler-times "vmaxsd"  	4 } } */
+/* { dg-final { scan-assembler-times "vminsd"  	3 } } */
+/* { dg-final { scan-assembler-times "vmaxud"  	2 } } */
+/* { dg-final { scan-assembler-times "vminud"  	2 } } */
+/* { dg-final { scan-assembler-times "vcmpequd" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsd" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpgtud" 1 } } */
+/* { dg-final { scan-assembler-times "vrld"     3 } } */
+/* { dg-final { scan-assembler-times "vsld"     5 } } */
+/* { dg-final { scan-assembler-times "vsrad"    3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
new file mode 100644
index 000000000000..b3f725f2d960
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-3.c
@@ -0,0 +1,104 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -ftree-vectorize -fvect-cost-model" } */
+
+#include <altivec.h>
+
+typedef vector long long		vll_sign;
+typedef vector unsigned long long	vll_uns;
+typedef vector bool long long		vll_bool;
+
+typedef vector int			vi_sign;
+typedef vector unsigned int		vi_uns;
+typedef vector bool int			vi_bool;
+
+typedef vector short			vs_sign;
+typedef vector unsigned short		vs_uns;
+typedef vector bool short		vs_bool;
+
+typedef vector signed char		vc_sign;
+typedef vector unsigned char		vc_uns;
+typedef vector bool char		vc_bool;
+
+
+vi_sign vi_pack_1 (vll_sign a, vll_sign b)
+{
+  return __builtin_altivec_vpkudum (a, b);
+}
+
+vi_sign vi_pack_2 (vll_sign a, vll_sign b)
+{
+  return vec_pack (a, b);
+}
+
+vi_sign vi_pack_3 (vll_sign a, vll_sign b)
+{
+  return vec_vpkudum (a, b);
+}
+
+vs_sign vs_pack_1 (vi_sign a, vi_sign b)
+{
+  return __builtin_altivec_vpkuwum (a, b);
+}
+
+vs_sign vs_pack_2 (vi_sign a, vi_sign b)
+{
+  return vec_pack (a, b);
+}
+
+vs_sign vs_pack_3 (vi_sign a, vi_sign b)
+{
+  return vec_vpkuwum (a, b);
+}
+
+vc_sign vc_pack_1 (vs_sign a, vs_sign b)
+{
+  return __builtin_altivec_vpkuhum (a, b);
+}
+
+vc_sign vc_pack_2 (vs_sign a, vs_sign b)
+{
+  return vec_pack (a, b);
+}
+
+vc_sign vc_pack_3 (vs_sign a, vs_sign b)
+{
+  return vec_vpkuhum (a, b);
+}
+
+vll_sign vll_unpack_hi_1 (vi_sign a)
+{
+  return __builtin_altivec_vupkhsw (a);
+}
+
+vll_sign vll_unpack_hi_2 (vi_sign a)
+{
+  return vec_unpackh (a);
+}
+
+vll_sign vll_unpack_hi_3 (vi_sign a)
+{
+  return __builtin_vec_vupkhsw (a);
+}
+
+vll_sign vll_unpack_lo_1 (vi_sign a)
+{
+  return vec_vupklsw (a);
+}
+
+vll_sign vll_unpack_lo_2 (vi_sign a)
+{
+  return vec_unpackl (a);
+}
+
+vll_sign vll_unpack_lo_3 (vi_sign a)
+{
+  return vec_vupklsw (a);
+}
+
+/* { dg-final { scan-assembler-times "vpkudum" 3 } } */
+/* { dg-final { scan-assembler-times "vpkuwum" 3 } } */
+/* { dg-final { scan-assembler-times "vpkuhum" 3 } } */
+/* { dg-final { scan-assembler-times "vupklsw" 3 } } */
+/* { dg-final { scan-assembler-times "vupkhsw" 3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
new file mode 100644
index 000000000000..518a6aa5e64c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-4.c
@@ -0,0 +1,249 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -ftree-vectorize -fvect-cost-model" } */
+
+#include <altivec.h>
+
+typedef vector long long		vll_sign;
+typedef vector unsigned long long	vll_uns;
+typedef vector bool long long		vll_bool;
+
+typedef vector int			vi_sign;
+typedef vector unsigned int		vi_uns;
+typedef vector bool int			vi_bool;
+
+typedef vector short			vs_sign;
+typedef vector unsigned short		vs_uns;
+typedef vector bool short		vs_bool;
+
+typedef vector signed char		vc_sign;
+typedef vector unsigned char		vc_uns;
+typedef vector bool char		vc_bool;
+
+vll_sign vll_clz_1 (vll_sign a)
+{
+  return __builtin_altivec_vclzd (a);
+}
+
+vll_sign vll_clz_2 (vll_sign a)
+{
+  return vec_vclz (a);
+}
+
+vll_sign vll_clz_3 (vll_sign a)
+{
+  return vec_vclzd (a);
+}
+
+vll_uns vll_clz_4 (vll_uns a)
+{
+  return vec_vclz (a);
+}
+
+vll_uns vll_clz_5 (vll_uns a)
+{
+  return vec_vclzd (a);
+}
+
+vi_sign vi_clz_1 (vi_sign a)
+{
+  return __builtin_altivec_vclzw (a);
+}
+
+vi_sign vi_clz_2 (vi_sign a)
+{
+  return vec_vclz (a);
+}
+
+vi_sign vi_clz_3 (vi_sign a)
+{
+  return vec_vclzw (a);
+}
+
+vi_uns vi_clz_4 (vi_uns a)
+{
+  return vec_vclz (a);
+}
+
+vi_uns vi_clz_5 (vi_uns a)
+{
+  return vec_vclzw (a);
+}
+
+vs_sign vs_clz_1 (vs_sign a)
+{
+  return __builtin_altivec_vclzh (a);
+}
+
+vs_sign vs_clz_2 (vs_sign a)
+{
+  return vec_vclz (a);
+}
+
+vs_sign vs_clz_3 (vs_sign a)
+{
+  return vec_vclzh (a);
+}
+
+vs_uns vs_clz_4 (vs_uns a)
+{
+  return vec_vclz (a);
+}
+
+vs_uns vs_clz_5 (vs_uns a)
+{
+  return vec_vclzh (a);
+}
+
+vc_sign vc_clz_1 (vc_sign a)
+{
+  return __builtin_altivec_vclzb (a);
+}
+
+vc_sign vc_clz_2 (vc_sign a)
+{
+  return vec_vclz (a);
+}
+
+vc_sign vc_clz_3 (vc_sign a)
+{
+  return vec_vclzb (a);
+}
+
+vc_uns vc_clz_4 (vc_uns a)
+{
+  return vec_vclz (a);
+}
+
+vc_uns vc_clz_5 (vc_uns a)
+{
+  return vec_vclzb (a);
+}
+
+vll_sign vll_popcnt_1 (vll_sign a)
+{
+  return __builtin_altivec_vpopcntd (a);
+}
+
+vll_sign vll_popcnt_2 (vll_sign a)
+{
+  return vec_vpopcnt (a);
+}
+
+vll_sign vll_popcnt_3 (vll_sign a)
+{
+  return vec_vpopcntd (a);
+}
+
+vll_uns vll_popcnt_4 (vll_uns a)
+{
+  return vec_vpopcnt (a);
+}
+
+vll_uns vll_popcnt_5 (vll_uns a)
+{
+  return vec_vpopcntd (a);
+}
+
+vi_sign vi_popcnt_1 (vi_sign a)
+{
+  return __builtin_altivec_vpopcntw (a);
+}
+
+vi_sign vi_popcnt_2 (vi_sign a)
+{
+  return vec_vpopcnt (a);
+}
+
+vi_sign vi_popcnt_3 (vi_sign a)
+{
+  return vec_vpopcntw (a);
+}
+
+vi_uns vi_popcnt_4 (vi_uns a)
+{
+  return vec_vpopcnt (a);
+}
+
+vi_uns vi_popcnt_5 (vi_uns a)
+{
+  return vec_vpopcntw (a);
+}
+
+vs_sign vs_popcnt_1 (vs_sign a)
+{
+  return __builtin_altivec_vpopcnth (a);
+}
+
+vs_sign vs_popcnt_2 (vs_sign a)
+{
+  return vec_vpopcnt (a);
+}
+
+vs_sign vs_popcnt_3 (vs_sign a)
+{
+  return vec_vpopcnth (a);
+}
+
+vs_uns vs_popcnt_4 (vs_uns a)
+{
+  return vec_vpopcnt (a);
+}
+
+vs_uns vs_popcnt_5 (vs_uns a)
+{
+  return vec_vpopcnth (a);
+}
+
+vc_sign vc_popcnt_1 (vc_sign a)
+{
+  return __builtin_altivec_vpopcntb (a);
+}
+
+vc_sign vc_popcnt_2 (vc_sign a)
+{
+  return vec_vpopcnt (a);
+}
+
+vc_sign vc_popcnt_3 (vc_sign a)
+{
+  return vec_vpopcntb (a);
+}
+
+vc_uns vc_popcnt_4 (vc_uns a)
+{
+  return vec_vpopcnt (a);
+}
+
+vc_uns vc_popcnt_5 (vc_uns a)
+{
+  return vec_vpopcntb (a);
+}
+
+vc_uns vc_gbb_1 (vc_uns a)
+{
+  return __builtin_altivec_vgbbd (a);
+}
+
+vc_sign vc_gbb_2 (vc_sign a)
+{
+  return vec_vgbbd (a);
+}
+
+vc_uns vc_gbb_3 (vc_uns a)
+{
+  return vec_vgbbd (a);
+}
+
+/* { dg-final { scan-assembler-times "vclzd" 	5 } } */
+/* { dg-final { scan-assembler-times "vclzw" 	5 } } */
+/* { dg-final { scan-assembler-times "vclzh" 	5 } } */
+/* { dg-final { scan-assembler-times "vclzb" 	5 } } */
+
+/* { dg-final { scan-assembler-times "vpopcntd" 5 } } */
+/* { dg-final { scan-assembler-times "vpopcntw" 5 } } */
+/* { dg-final { scan-assembler-times "vpopcnth" 5 } } */
+/* { dg-final { scan-assembler-times "vpopcntb" 5 } } */
+
+/* { dg-final { scan-assembler-times "vgbbd"    3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
new file mode 100644
index 000000000000..2e64551ff680
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-5.c
@@ -0,0 +1,105 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+#include <altivec.h>
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#ifndef ATTR_ALIGN
+#define ATTR_ALIGN __attribute__((__aligned__(ALIGN)))
+#endif
+
+#define DOIT(TYPE, PREFIX)						\
+TYPE PREFIX ## _eqv_builtin (TYPE a, TYPE b)				\
+{									\
+  return vec_eqv (a, b);						\
+}									\
+									\
+TYPE PREFIX ## _eqv_arith (TYPE a, TYPE b)				\
+{									\
+  return ~(a ^ b);							\
+}									\
+									\
+TYPE PREFIX ## _nand_builtin (TYPE a, TYPE b)				\
+{									\
+  return vec_nand (a, b);						\
+}									\
+									\
+TYPE PREFIX ## _nand_arith1 (TYPE a, TYPE b)				\
+{									\
+  return ~(a & b);							\
+}									\
+									\
+TYPE PREFIX ## _nand_arith2 (TYPE a, TYPE b)				\
+{									\
+  return (~a) | (~b);							\
+}									\
+									\
+TYPE PREFIX ## _orc_builtin (TYPE a, TYPE b)				\
+{									\
+  return vec_orc (a, b);						\
+}									\
+									\
+TYPE PREFIX ## _orc_arith1 (TYPE a, TYPE b)				\
+{									\
+  return (~ a) | b;							\
+}									\
+									\
+TYPE PREFIX ## _orc_arith2 (TYPE a, TYPE b)				\
+{									\
+  return a | (~ b);							\
+}
+
+#define DOIT_FLOAT(TYPE, PREFIX)					\
+TYPE PREFIX ## _eqv_builtin (TYPE a, TYPE b)				\
+{									\
+  return vec_eqv (a, b);						\
+}									\
+									\
+TYPE PREFIX ## _nand_builtin (TYPE a, TYPE b)				\
+{									\
+  return vec_nand (a, b);						\
+}									\
+									\
+TYPE PREFIX ## _orc_builtin (TYPE a, TYPE b)				\
+{									\
+  return vec_orc (a, b);						\
+}
+
+typedef vector signed char		sign_char_vec;
+typedef vector short			sign_short_vec;
+typedef vector int			sign_int_vec;
+typedef vector long long		sign_llong_vec;
+
+typedef vector unsigned char		uns_char_vec;
+typedef vector unsigned short		uns_short_vec;
+typedef vector unsigned int		uns_int_vec;
+typedef vector unsigned long long	uns_llong_vec;
+
+typedef vector float			float_vec;
+typedef vector double			double_vec;
+
+DOIT(sign_char_vec,	sign_char)
+DOIT(sign_short_vec,	sign_short)
+DOIT(sign_int_vec,	sign_int)
+DOIT(sign_llong_vec,	sign_llong)
+
+DOIT(uns_char_vec,	uns_char)
+DOIT(uns_short_vec,	uns_short)
+DOIT(uns_int_vec,	uns_int)
+DOIT(uns_llong_vec,	uns_llong)
+
+DOIT_FLOAT(float_vec,	float)
+DOIT_FLOAT(double_vec,	double)
+
+/* { dg-final { scan-assembler-times "xxleqv"  18 } } */
+/* { dg-final { scan-assembler-times "xxlnand" 26 } } */
+/* { dg-final { scan-assembler-times "xxlorc"  26 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
new file mode 100644
index 000000000000..8b81781c6a4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+vector float dbl_to_float_p8 (double x) { return __builtin_vsx_xscvdpspn (x); }
+double float_to_dbl_p8 (vector float x) { return __builtin_vsx_xscvspdpn (x); }
+
+/* { dg-final { scan-assembler "xscvdpspn" } } */
+/* { dg-final { scan-assembler "xscvspdpn" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
new file mode 100644
index 000000000000..45a300fb9444
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-7.c
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+#include <altivec.h>
+
+typedef vector int		v_sign;
+typedef vector unsigned int	v_uns;
+
+v_sign even_sign (v_sign a, v_sign b)
+{
+  return vec_vmrgew (a, b);
+}
+
+v_uns even_uns (v_uns a, v_uns b)
+{
+  return vec_vmrgew (a, b);
+}
+
+v_sign odd_sign (v_sign a, v_sign b)
+{
+  return vec_vmrgow (a, b);
+}
+
+v_uns odd_uns (v_uns a, v_uns b)
+{
+  return vec_vmrgow (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vmrgew" 2 } } */
+/* { dg-final { scan-assembler-times "vmrgow" 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c b/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
new file mode 100644
index 000000000000..3cfd8161dd63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-fp.c
@@ -0,0 +1,139 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf -fno-math-errno" } */
+
+float abs_sf (float *p)
+{
+  float f = *p;
+  __asm__ ("# reg %x0" : "+v" (f));
+  return __builtin_fabsf (f);
+}
+
+float nabs_sf (float *p)
+{
+  float f = *p;
+  __asm__ ("# reg %x0" : "+v" (f));
+  return - __builtin_fabsf (f);
+}
+
+float neg_sf (float *p)
+{
+  float f = *p;
+  __asm__ ("# reg %x0" : "+v" (f));
+  return - f;
+}
+
+float add_sf (float *p, float *q)
+{
+  float f1 = *p;
+  float f2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
+  return f1 + f2;
+}
+
+float sub_sf (float *p, float *q)
+{
+  float f1 = *p;
+  float f2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
+  return f1 - f2;
+}
+
+float mul_sf (float *p, float *q)
+{
+  float f1 = *p;
+  float f2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
+  return f1 * f2;
+}
+
+float div_sf (float *p, float *q)
+{
+  float f1 = *p;
+  float f2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (f1), "+v" (f2));
+  return f1 / f2;
+}
+
+float sqrt_sf (float *p)
+{
+  float f = *p;
+  __asm__ ("# reg %x0" : "+v" (f));
+  return __builtin_sqrtf (f);
+}
+
+
+double abs_df (double *p)
+{
+  double d = *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return __builtin_fabs (d);
+}
+
+double nabs_df (double *p)
+{
+  double d = *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return - __builtin_fabs (d);
+}
+
+double neg_df (double *p)
+{
+  double d = *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return - d;
+}
+
+double add_df (double *p, double *q)
+{
+  double d1 = *p;
+  double d2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
+  return d1 + d2;
+}
+
+double sub_df (double *p, double *q)
+{
+  double d1 = *p;
+  double d2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
+  return d1 - d2;
+}
+
+double mul_df (double *p, double *q)
+{
+  double d1 = *p;
+  double d2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
+  return d1 * d2;
+}
+
+double div_df (double *p, double *q)
+{
+  double d1 = *p;
+  double d2 = *q;
+  __asm__ ("# reg %x0, %x1" : "+v" (d1), "+v" (d2));
+  return d1 / d2;
+}
+
+double sqrt_df (float *p)
+{
+  double d = *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return __builtin_sqrt (d);
+}
+
+/* { dg-final { scan-assembler "xsabsdp"  } } */
+/* { dg-final { scan-assembler "xsadddp"  } } */
+/* { dg-final { scan-assembler "xsaddsp"  } } */
+/* { dg-final { scan-assembler "xsdivdp"  } } */
+/* { dg-final { scan-assembler "xsdivsp"  } } */
+/* { dg-final { scan-assembler "xsmuldp"  } } */
+/* { dg-final { scan-assembler "xsmulsp"  } } */
+/* { dg-final { scan-assembler "xsnabsdp" } } */
+/* { dg-final { scan-assembler "xsnegdp"  } } */
+/* { dg-final { scan-assembler "xssqrtdp" } } */
+/* { dg-final { scan-assembler "xssqrtsp" } } */
+/* { dg-final { scan-assembler "xssubdp"  } } */
+/* { dg-final { scan-assembler "xssubsp"  } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c b/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
new file mode 100644
index 000000000000..33f19991f761
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
@@ -0,0 +1,42 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */
+
+float load_sf (float *p)
+{
+  float f = *p;
+  __asm__ ("# reg %x0" : "+v" (f));
+  return f;
+}
+
+double load_df (double *p)
+{
+  double d = *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return d;
+}
+
+double load_dfsf (float *p)
+{
+  double d = (double) *p;
+  __asm__ ("# reg %x0" : "+v" (d));
+  return d;
+}
+
+void store_sf (float *p, float f)
+{
+  __asm__ ("# reg %x0" : "+v" (f));
+  *p = f;
+}
+
+void store_df (double *p, double d)
+{
+  __asm__ ("# reg %x0" : "+v" (d));
+  *p = d;
+}
+
+/* { dg-final { scan-assembler "lxsspx"  } } */
+/* { dg-final { scan-assembler "lxsdx"   } } */
+/* { dg-final { scan-assembler "stxsspx" } } */
+/* { dg-final { scan-assembler "stxsdx"  } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
new file mode 100644
index 000000000000..9a975bd6fe2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-1.c
@@ -0,0 +1,200 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#ifndef TYPE
+#define TYPE long long
+#endif
+
+#ifndef SIGN_TYPE
+#define SIGN_TYPE signed TYPE
+#endif
+
+#ifndef UNS_TYPE
+#define UNS_TYPE unsigned TYPE
+#endif
+
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
+
+SIGN_TYPE	sa[SIZE] ALIGN_ATTR;
+SIGN_TYPE	sb[SIZE] ALIGN_ATTR;
+SIGN_TYPE	sc[SIZE] ALIGN_ATTR;
+
+UNS_TYPE	ua[SIZE] ALIGN_ATTR;
+UNS_TYPE	ub[SIZE] ALIGN_ATTR;
+UNS_TYPE	uc[SIZE] ALIGN_ATTR;
+
+void
+sign_add (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = sb[i] + sc[i];
+}
+
+void
+sign_sub (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = sb[i] - sc[i];
+}
+
+void
+sign_shift_left (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = sb[i] << sc[i];
+}
+
+void
+sign_shift_right (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = sb[i] >> sc[i];
+}
+
+void
+sign_max (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = (sb[i] > sc[i]) ? sb[i] : sc[i];
+}
+
+void
+sign_min (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = (sb[i] < sc[i]) ? sb[i] : sc[i];
+}
+
+void
+sign_abs (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = (sb[i] < 0) ? -sb[i] : sb[i];	/* xor, vsubudm, vmaxsd.  */
+}
+
+void
+sign_eq (SIGN_TYPE val1, SIGN_TYPE val2)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = (sb[i] == sc[i]) ? val1 : val2;
+}
+
+void
+sign_lt (SIGN_TYPE val1, SIGN_TYPE val2)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    sa[i] = (sb[i] < sc[i]) ? val1 : val2;
+}
+
+void
+uns_add (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = ub[i] + uc[i];
+}
+
+void
+uns_sub (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = ub[i] - uc[i];
+}
+
+void
+uns_shift_left (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = ub[i] << uc[i];
+}
+
+void
+uns_shift_right (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = ub[i] >> uc[i];
+}
+
+void
+uns_max (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = (ub[i] > uc[i]) ? ub[i] : uc[i];
+}
+
+void
+uns_min (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = (ub[i] < uc[i]) ? ub[i] : uc[i];
+}
+
+void
+uns_eq (UNS_TYPE val1, UNS_TYPE val2)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = (ub[i] == uc[i]) ? val1 : val2;
+}
+
+void
+uns_lt (UNS_TYPE val1, UNS_TYPE val2)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    ua[i] = (ub[i] < uc[i]) ? val1 : val2;
+}
+
+/* { dg-final { scan-assembler-times "\[\t \]vaddudm\[\t \]"  2 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vsubudm\[\t \]"  3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vmaxsd\[\t \]"   2 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vmaxud\[\t \]"   1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vminsd\[\t \]"   1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vminud\[\t \]"   1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vsld\[\t \]"     2 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vsrad\[\t \]"    1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vsrd\[\t \]"     1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vcmpequd\[\t \]" 2 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vcmpgtsd\[\t \]" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]vcmpgtud\[\t \]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
new file mode 100644
index 000000000000..8feba0a13473
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model" } */
+
+#include <stddef.h>
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
+
+long long sign_ll[SIZE]	ALIGN_ATTR;
+int	  sign_i [SIZE]	ALIGN_ATTR;
+
+void copy_int_to_long_long (void)
+{
+  size_t i;
+
+  for (i = 0; i < SIZE; i++)
+    sign_ll[i] = sign_i[i];
+}
+
+/* { dg-final { scan-assembler "vupkhsw" } } */
+/* { dg-final { scan-assembler "vupklsw" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
new file mode 100644
index 000000000000..570f2e5991c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-3.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model" } */
+
+#include <stddef.h>
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
+
+long long sign_ll[SIZE]	ALIGN_ATTR;
+int	  sign_i [SIZE]	ALIGN_ATTR;
+
+void copy_long_long_to_int (void)
+{
+  size_t i;
+
+  for (i = 0; i < SIZE; i++)
+    sign_i[i] = sign_ll[i];
+}
+
+/* { dg-final { scan-assembler "vpkudum" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
new file mode 100644
index 000000000000..90df88680514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-4.c
@@ -0,0 +1,69 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
+
+#define DO_BUILTIN(PREFIX, TYPE, CLZ, POPCNT)				\
+TYPE PREFIX ## _a[SIZE] ALIGN_ATTR;					\
+TYPE PREFIX ## _b[SIZE] ALIGN_ATTR;					\
+									\
+void									\
+PREFIX ## _clz (void)							\
+{									\
+  unsigned long i;							\
+									\
+  for (i = 0; i < SIZE; i++)						\
+    PREFIX ## _a[i] = CLZ (PREFIX ## _b[i]);				\
+}									\
+									\
+void									\
+PREFIX ## _popcnt (void)						\
+{									\
+  unsigned long i;							\
+									\
+  for (i = 0; i < SIZE; i++)						\
+    PREFIX ## _a[i] = POPCNT (PREFIX ## _b[i]);				\
+}
+
+#if !defined(DO_LONG_LONG) && !defined(DO_LONG) && !defined(DO_INT) && !defined(DO_SHORT) && !defined(DO_CHAR)
+#define DO_INT 1
+#endif
+
+#if DO_LONG_LONG
+/* At the moment, only int is auto vectorized.  */
+DO_BUILTIN (sll, long long,		__builtin_clzll, __builtin_popcountll)
+DO_BUILTIN (ull, unsigned long long,	__builtin_clzll, __builtin_popcountll)
+#endif
+
+#if defined(_ARCH_PPC64) && DO_LONG
+DO_BUILTIN (sl,  long,			__builtin_clzl,  __builtin_popcountl)
+DO_BUILTIN (ul,  unsigned long,		__builtin_clzl,  __builtin_popcountl)
+#endif
+
+#if DO_INT
+DO_BUILTIN (si,  int,			__builtin_clz,   __builtin_popcount)
+DO_BUILTIN (ui,  unsigned int,		__builtin_clz,   __builtin_popcount)
+#endif
+
+#if DO_SHORT
+DO_BUILTIN (ss,  short,			__builtin_clz,   __builtin_popcount)
+DO_BUILTIN (us,  unsigned short,	__builtin_clz,   __builtin_popcount)
+#endif
+
+#if DO_CHAR
+DO_BUILTIN (sc,  signed char,		__builtin_clz,   __builtin_popcount)
+DO_BUILTIN (uc,  unsigned char,		__builtin_clz,   __builtin_popcount)
+#endif
+
+/* { dg-final { scan-assembler-times "vclzw"     2 } } */
+/* { dg-final { scan-assembler-times "vpopcntw"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
new file mode 100644
index 000000000000..17563bf477a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-vectorize-5.c
@@ -0,0 +1,87 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#ifndef ATTR_ALIGN
+#define ATTR_ALIGN __attribute__((__aligned__(ALIGN)))
+#endif
+
+#ifndef TYPE
+#define TYPE unsigned int
+#endif
+
+TYPE in1  [SIZE] ATTR_ALIGN;
+TYPE in2  [SIZE] ATTR_ALIGN;
+TYPE eqv  [SIZE] ATTR_ALIGN;
+TYPE nand1[SIZE] ATTR_ALIGN;
+TYPE nand2[SIZE] ATTR_ALIGN;
+TYPE orc1 [SIZE] ATTR_ALIGN;
+TYPE orc2 [SIZE] ATTR_ALIGN;
+
+void
+do_eqv (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      eqv[i] = ~(in1[i] ^ in2[i]);
+    }
+}
+
+void
+do_nand1 (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      nand1[i] = ~(in1[i] & in2[i]);
+    }
+}
+
+void
+do_nand2 (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      nand2[i] = (~in1[i]) | (~in2[i]);
+    }
+}
+
+void
+do_orc1 (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      orc1[i] = (~in1[i]) | in2[i];
+    }
+}
+
+void
+do_orc2 (void)
+{
+  unsigned long i;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      orc1[i] = in1[i] | (~in2[i]);
+    }
+}
+
+/* { dg-final { scan-assembler-times "xxleqv"  1 } } */
+/* { dg-final { scan-assembler-times "xxlnand" 2 } } */
+/* { dg-final { scan-assembler-times "xxlorc"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
index c98666c47a0b..b39fe4115bcf 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-1.c
@@ -5,8 +5,7 @@
 /* { dg-final { scan-assembler-times "fabs" 3 } } */
 /* { dg-final { scan-assembler-times "fnabs" 3 } } */
 /* { dg-final { scan-assembler-times "fsel" 3 } } */
-/* { dg-final { scan-assembler-times "fcpsgn" 3 } } */
-/* { dg-final { scan-assembler-times "xscpsgndp" 1 } } */
+/* { dg-final { scan-assembler-times "fcpsgn\|xscpsgndp" 4 } } */
 
 double normal1 (double, double);
 double power5  (double, double) __attribute__((__target__("cpu=power5")));
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
index 8ef95b7a15b6..e8a2de3636a1 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-target-2.c
@@ -5,8 +5,7 @@
 /* { dg-final { scan-assembler-times "fabs" 3 } } */
 /* { dg-final { scan-assembler-times "fnabs" 3 } } */
 /* { dg-final { scan-assembler-times "fsel" 3 } } */
-/* { dg-final { scan-assembler-times "fcpsgn" 3 } } */
-/* { dg-final { scan-assembler-times "xscpsgndp" 1 } } */
+/* { dg-final { scan-assembler-times "fcpsgn\|xscpsgndp" 4 } } */
 
 /* fabs/fnabs/fsel */
 double normal1 (double a, double b) { return __builtin_copysign (a, b); }
diff --git a/gcc/testsuite/gcc.target/powerpc/pr42747.c b/gcc/testsuite/gcc.target/powerpc/pr42747.c
index 9e7310e1767a..41362db17746 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr42747.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr42747.c
@@ -5,4 +5,4 @@
 
 double foo (double x) { return __builtin_sqrt (x); }
 
-/* { dg-final { scan-assembler "xssqrtdp" } } */
+/* { dg-final { scan-assembler "xssqrtdp\|fsqrt" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr57744.c b/gcc/testsuite/gcc.target/powerpc/pr57744.c
new file mode 100644
index 000000000000..222fd6abd4f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr57744.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3" } */
+
+void abort (void);
+
+typedef unsigned U_16 __attribute__((mode(TI)));
+
+extern int libat_compare_exchange_16 (U_16 *, U_16 *, U_16, int, int)
+  __attribute__((__noinline__));
+
+/* PR 57744: lqarx/stqcx needs even/odd register pairs.  The assembler will
+   complain if the compiler gets an odd/even register pair.  Create a function
+   which has the 16 byte compare and exchange instructions, but don't actually
+   execute it, so that we can detect these failures on older machines. */
+
+int
+libat_compare_exchange_16 (U_16 *mptr, U_16 *eptr, U_16 newval,
+         int smodel, int fmodel __attribute__((unused)))
+{
+  if (((smodel) == 0))
+    return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 0, 0);
+  else if (((smodel) != 5))
+    return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 4, 0);
+  else
+    return __atomic_compare_exchange_n (mptr, eptr, newval, 0, 5, 0);
+}
+
+U_16 a = 1, b = 1, c = -2;
+volatile int do_test = 0;
+
+int main (void)
+{
+  if (do_test && !libat_compare_exchange_16 (&a, &b, c, 0, 0))
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr58673-1.c b/gcc/testsuite/gcc.target/powerpc/pr58673-1.c
new file mode 100644
index 000000000000..6f7838f8ddec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr58673-1.c
@@ -0,0 +1,78 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -m64 -O1" } */
+
+enum typecode
+{
+  QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode,
+    DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE
+};
+enum bytecode_opcode
+{
+  neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI,
+    constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI,
+    loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI,
+    storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK,
+    addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI,
+    convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU,
+    convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI,
+    convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF,
+    convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF,
+    convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF,
+    convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI,
+    convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI,
+    convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI,
+    convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT,
+    convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF,
+    addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI,
+    mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF,
+    divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI,
+    xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU,
+    rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI,
+    leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF,
+    geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI,
+    eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI,
+    negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI,
+    predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI,
+    preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF,
+    preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP,
+    postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI,
+    postincSI, postincDI, postincP, postincSF, postincDF, postincXF,
+    postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI,
+    caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE
+};
+struct binary_operator
+{
+  enum bytecode_opcode opcode;
+  enum typecode arg0;
+};
+static struct conversion_recipe
+{
+  unsigned char *opcodes;
+  int cost;
+}
+conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int)
+						     LAST_AND_UNUSED_TYPECODE)];
+static struct conversion_recipe
+deduce_conversion (from, to)
+     enum typecode from, to;
+{
+  (conversion_recipe[(int) from][(int) to].
+   opcodes ? 0 : (conversion_recipe[(int) from][(int) to] =
+		  deduce_conversion (from, to), 0));
+}
+
+void
+bc_expand_binary_operation (optab, resulttype, arg0, arg1)
+     struct binary_operator optab[];
+{
+  int i, besti, cost, bestcost;
+  enum typecode resultcode, arg0code;
+  for (i = 0; optab[i].opcode != -1; ++i)
+    {
+      (conversion_recipe[(int) arg0code][(int) optab[i].arg0].
+       opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] =
+		      deduce_conversion (arg0code, optab[i].arg0), 0));
+    }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr58673-2.c b/gcc/testsuite/gcc.target/powerpc/pr58673-2.c
new file mode 100644
index 000000000000..b70d2eed88ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr58673-2.c
@@ -0,0 +1,217 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+typedef long unsigned int size_t;
+typedef struct _IO_FILE FILE;
+typedef float real;
+typedef real rvec[3];
+typedef real matrix[3][3];
+typedef real tensor[3][3];
+enum
+{
+  F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC,
+    F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ,
+    F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES,
+    F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE,
+    F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD,
+    F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES,
+    F_DVDL, F_DVDLKIN, F_NRE
+};
+typedef union
+{
+  struct
+  {
+  }
+  bham;
+  struct
+  {
+    real rA, krA, rB, krB;
+  }
+  harmonic;
+}
+t_iparams;
+typedef struct
+{
+  t_iparams *iparams;
+}
+t_idef;
+typedef struct
+{
+}
+t_inputrec;
+typedef struct
+{
+}
+t_commrec;
+typedef struct
+{
+}
+t_forcerec;
+typedef struct
+{
+}
+t_mdatoms;
+typedef struct
+{
+}
+t_filenm;
+enum
+{
+  eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy,
+    eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory =
+    eoObsNR, eoInter, eoUseVirial, eoNR
+};
+extern char *eoNames[eoNR];
+typedef struct
+{
+  int bPrint;
+}
+t_coupl_LJ;
+typedef struct
+{
+  int eObs;
+  t_iparams xi;
+}
+t_coupl_iparams;
+typedef struct
+{
+  real act_value[eoObsNR];
+  real av_value[eoObsNR];
+  real ref_value[eoObsNR];
+  int bObsUsed[eoObsNR];
+  int nLJ, nBU, nQ, nIP;
+  t_coupl_LJ *tcLJ;
+}
+t_coupl_rec;
+static void
+pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile,
+       t_filenm fnm[])
+{
+  static FILE *prop;
+  static FILE **out = ((void *) 0);
+  static FILE **qq = ((void *) 0);
+  static FILE **ip = ((void *) 0);
+  char buf[256];
+  char *leg[] = {
+    "C12", "C6"
+  };
+  char **raleg;
+  int i, j, index;
+  if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0))
+      && (ip == ((void *) 0)))
+    {
+      for (i = j = 0; (i < eoObsNR); i++)
+	{
+	  if (tcr->bObsUsed[i])
+	    {
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (eoNames[i])
+		  && ((size_t) (const void *) ((eoNames[i]) + 1) -
+		      (size_t) (const void *) (eoNames[i]) ==
+		      1) ? (((const char *) (eoNames[i]))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (eoNames
+									    [i])
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (eoNames[i])));
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (buf)
+		  && ((size_t) (const void *) ((buf) + 1) -
+		      (size_t) (const void *) (buf) ==
+		      1) ? (((const char *) (buf))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (buf)
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (buf)));
+	    }
+	}
+      if (tcr->nLJ)
+	{
+	  for (i = 0; (i < tcr->nLJ); i++)
+	    {
+	      if (tcr->tcLJ[i].bPrint)
+		{
+		  xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])),
+			       leg);
+		}
+	    }
+	}
+    }
+}
+
+void
+do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t,
+	     int step, real ener[], t_forcerec * fr, t_inputrec * ir,
+	     int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver,
+	     int nmols, t_commrec * cr, matrix box, tensor virial,
+	     tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt)
+{
+  int i, j, ati, atj, atnr2, type, ftype;
+  real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf;
+  real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind;
+  int bTest, bPrint;
+  t_coupl_iparams *tip;
+  if (bPrint)
+    {
+      pr_ff (tcr, t, idef, cr, nfile, fnm);
+    }
+  for (i = 0; (i < eoObsNR); i++)
+    {
+      deviation[i] =
+	calc_deviation (tcr->av_value[i], tcr->act_value[i],
+			tcr->ref_value[i]);
+      prdev[i] = tcr->ref_value[i] - tcr->act_value[i];
+    }
+  if (bPrint)
+    pr_dev (tcr, t, prdev, cr, nfile, fnm);
+  for (i = 0; (i < atnr2); i++)
+    {
+      factor = dt * deviation[tip->eObs];
+      switch (ftype)
+	{
+	case F_BONDS:
+	  if (fabs (tip->xi.harmonic.krA) > 1.2e-38)
+	    idef->iparams[type].harmonic.krA *=
+	      (1 + factor / tip->xi.harmonic.krA);
+	}
+    }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr59054.c b/gcc/testsuite/gcc.target/powerpc/pr59054.c
new file mode 100644
index 000000000000..ab2ff6dea292
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr59054.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O0 -m64" } */
+
+long foo (void) { return 0; }
+
+/* { dg-final { scan-assembler-not "xxlor" } } */
+/* { dg-final { scan-assembler-not "stfd" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/recip-3.c b/gcc/testsuite/gcc.target/powerpc/recip-3.c
index 905e793952e0..238f8f361985 100644
--- a/gcc/testsuite/gcc.target/powerpc/recip-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/recip-3.c
@@ -1,14 +1,14 @@
 /* { dg-do compile { target { { powerpc*-*-* } && { ! powerpc*-apple-darwin* } } } } */
 /* { dg-require-effective-target powerpc_fprs } */
 /* { dg-options "-O2 -mrecip -ffast-math -mcpu=power7" } */
-/* { dg-final { scan-assembler-times "xsrsqrtedp" 1 } } */
+/* { dg-final { scan-assembler-times "xsrsqrtedp\|frsqrte\ " 1 } } */
 /* { dg-final { scan-assembler-times "xsmsub.dp\|fmsub\ " 1 } } */
-/* { dg-final { scan-assembler-times "xsmuldp" 4 } } */
+/* { dg-final { scan-assembler-times "xsmuldp\|fmul\ " 4 } } */
 /* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 2 } } */
-/* { dg-final { scan-assembler-times "frsqrtes" 1 } } */
-/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
-/* { dg-final { scan-assembler-times "fmuls" 4 } } */
-/* { dg-final { scan-assembler-times "fnmsubs" 2 } } */
+/* { dg-final { scan-assembler-times "xsrsqrtesp\|frsqrtes" 1 } } */
+/* { dg-final { scan-assembler-times "xsmsub.sp\|fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "xsmulsp\|fmuls" 4 } } */
+/* { dg-final { scan-assembler-times "xsnmsub.sp\|fnmsubs" 2 } } */
 
 double
 rsqrt_d (double a)
diff --git a/gcc/testsuite/gcc.target/powerpc/recip-5.c b/gcc/testsuite/gcc.target/powerpc/recip-5.c
index 3d7d691d5acd..902fb2674b31 100644
--- a/gcc/testsuite/gcc.target/powerpc/recip-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/recip-5.c
@@ -4,8 +4,8 @@
 /* { dg-options "-O3 -ftree-vectorize -mrecip=all -ffast-math -mcpu=power7 -fno-unroll-loops" } */
 /* { dg-final { scan-assembler-times "xvredp" 4 } } */
 /* { dg-final { scan-assembler-times "xvresp" 5 } } */
-/* { dg-final { scan-assembler-times "xsredp" 2 } } */
-/* { dg-final { scan-assembler-times "fres" 2 } } */
+/* { dg-final { scan-assembler-times "xsredp\|fre\ " 2 } } */
+/* { dg-final { scan-assembler-times "xsresp\|fres" 2 } } */
 
 #include <altivec.h>
 
diff --git a/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c b/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
new file mode 100644
index 000000000000..947382b7f7bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power6 -mhard-dfp" } */
+/* { dg-final { scan-assembler-not   "lfiwzx"   } } */
+/* { dg-final { scan-assembler-times "lfd"    2 } } */
+/* { dg-final { scan-assembler-times "dctdp"  2 } } */
+/* { dg-final { scan-assembler-times "dadd"   1 } } */
+/* { dg-final { scan-assembler-times "drsp"   1 } } */
+
+/* Test that for power6 we need to use a bounce buffer on the stack to load
+   SDmode variables because the power6 does not have a way to directly load
+   32-bit values from memory.  */
+_Decimal32 a;
+
+void inc_dec32 (void)
+{
+  a += (_Decimal32) 1.0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sd-vsx.c b/gcc/testsuite/gcc.target/powerpc/sd-vsx.c
new file mode 100644
index 000000000000..7e41e1e84cc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sd-vsx.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7 -mhard-dfp" } */
+/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
+/* { dg-final { scan-assembler-times "stfiwx" 1 } } */
+/* { dg-final { scan-assembler-not   "lfd"      } } */
+/* { dg-final { scan-assembler-not   "stfd"     } } */
+/* { dg-final { scan-assembler-times "dctdp"  2 } } */
+/* { dg-final { scan-assembler-times "dadd"   1 } } */
+/* { dg-final { scan-assembler-times "drsp"   1 } } */
+
+/* Test that power7 can directly load/store SDmode variables without using a
+   bounce buffer.  */
+_Decimal32 a;
+
+void inc_dec32 (void)
+{
+  a += (_Decimal32) 1.0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
index 8450920ec0cd..7aeba6cb563a 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-3.c
@@ -16,9 +16,9 @@
 /* { dg-final { scan-assembler "xvrspiz" } } */
 /* { dg-final { scan-assembler "xsrdpi" } } */
 /* { dg-final { scan-assembler "xsrdpic" } } */
-/* { dg-final { scan-assembler "xsrdpim" } } */
-/* { dg-final { scan-assembler "xsrdpip" } } */
-/* { dg-final { scan-assembler "xsrdpiz" } } */
+/* { dg-final { scan-assembler "xsrdpim\|frim" } } */
+/* { dg-final { scan-assembler "xsrdpip\|frip" } } */
+/* { dg-final { scan-assembler "xsrdpiz\|friz" } } */
 /* { dg-final { scan-assembler "xsmaxdp" } } */
 /* { dg-final { scan-assembler "xsmindp" } } */
 /* { dg-final { scan-assembler "xxland" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-float0.c b/gcc/testsuite/gcc.target/powerpc/vsx-float0.c
new file mode 100644
index 000000000000..7e4fea689576
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-float0.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler "xxlxor" } } */
+
+/* Test that we generate xxlor to clear a SFmode register.  */
+
+float sum (float *p, unsigned long n)
+{
+  float sum = 0.0f;	/* generate xxlxor instead of load */
+  while (n-- > 0)
+    sum += *p++;
+
+  return sum;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index a146f1732c9c..63ddb2d04c71 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1311,6 +1311,32 @@ proc check_effective_target_avx_runtime { } {
     return 0
 }
 
+# Return 1 if the target supports executing power8 vector instructions, 0
+# otherwise.  Cache the result.
+
+proc check_p8vector_hw_available { } {
+    return [check_cached_effective_target p8vector_hw_available {
+	# Some simulators are known to not support VSX/power8 instructions.
+	# For now, disable on Darwin
+	if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} {
+	    expr 0
+	} else {
+	    set options "-mpower8-vector"
+	    check_runtime_nocache p8vector_hw_available {
+		int main()
+		{
+		#ifdef __MACH__
+		  asm volatile ("xxlorc vs0,vs0,vs0");
+		#else
+		  asm volatile ("xxlorc 0,0,0");
+	        #endif
+		  return 0;
+		}
+	    } $options
+	}
+    }]
+}
+
 # Return 1 if the target supports executing VSX instructions, 0
 # otherwise.  Cache the result.
 
@@ -2672,6 +2698,33 @@ proc check_effective_target_powerpc_altivec_ok { } {
     }
 }
 
+# Return 1 if this is a PowerPC target supporting -mpower8-vector
+
+proc check_effective_target_powerpc_p8vector_ok { } {
+    if { ([istarget powerpc*-*-*]
+         && ![istarget powerpc-*-linux*paired*])
+	 || [istarget rs6000-*-*] } {
+	# AltiVec is not supported on AIX before 5.3.
+	if { [istarget powerpc*-*-aix4*]
+	     || [istarget powerpc*-*-aix5.1*] 
+	     || [istarget powerpc*-*-aix5.2*] } {
+	    return 0
+	}
+	return [check_no_compiler_messages powerpc_p8vector_ok object {
+	    int main (void) {
+#ifdef __MACH__
+		asm volatile ("xxlorc vs0,vs0,vs0");
+#else
+		asm volatile ("xxlorc 0,0,0");
+#endif
+		return 0;
+	    }
+	} "-mpower8-vector"]
+    } else {
+	return 0
+    }
+}
+
 # Return 1 if this is a PowerPC target supporting -mvsx
 
 proc check_effective_target_powerpc_vsx_ok { } {
@@ -4499,6 +4552,7 @@ proc is-effective-target { arg } {
 	switch $arg {
 	  "vmx_hw"         { set selected [check_vmx_hw_available] }
 	  "vsx_hw"         { set selected [check_vsx_hw_available] }
+	  "p8vector_hw"    { set selected [check_p8vector_hw_available] }
 	  "ppc_recip_hw"   { set selected [check_ppc_recip_hw_available] }
 	  "named_sections" { set selected [check_named_sections_available] }
 	  "gc_sections"    { set selected [check_gc_sections_available] }
@@ -4520,6 +4574,7 @@ proc is-effective-target-keyword { arg } {
 	switch $arg {
 	  "vmx_hw"         { return 1 }
 	  "vsx_hw"         { return 1 }
+	  "p8vector_hw"    { return 1 }
 	  "ppc_recip_hw"   { return 1 }
 	  "named_sections" { return 1 }
 	  "gc_sections"    { return 1 }
@@ -5077,7 +5132,9 @@ proc check_vect_support_and_set_flags { } {
         }
 
         lappend DEFAULT_VECTCFLAGS "-maltivec"
-        if [check_vsx_hw_available] {
+        if [check_p8vector_hw_available] {
+            lappend DEFAULT_VECTCFLAGS "-mpower8-vector" "-mno-allow-movmisalign"
+        } elseif [check_vsx_hw_available] {
             lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign"
         }
 
-- 
2.47.2