#include "gimplify.h"
#include "gimple-fold.h"
#include "gimple-iterator.h"
-#include "gimple-ssa.h"
#include "gimple-walk.h"
+#include "ssa.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
#include "intl.h"
#include "tm-constrs.h"
-#include "tree-vectorizer.h"
#include "target-globals.h"
#include "builtins.h"
#include "tree-vector-builder.h"
#endif
#include "case-cfn-macros.h"
#include "ppc-auxv.h"
-#include "tree-ssa-propagate.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
#include "rs6000-internal.h"
#include "opts.h"
CLONE_ISA_2_05, /* ISA 2.05 (power6). */
CLONE_ISA_2_06, /* ISA 2.06 (power7). */
CLONE_ISA_2_07, /* ISA 2.07 (power8). */
- CLONE_ISA_3_00, /* ISA 3.00 (power9). */
+ CLONE_ISA_3_00, /* ISA 3.0 (power9). */
+ CLONE_ISA_3_1, /* ISA 3.1 (power10). */
CLONE_MAX
};
{ OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
{ OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
{ OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
- { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
+ { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
+ { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
};
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
+#undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
+#define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
+
#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
#undef TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
+
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
rs6000_cannot_substitute_mem_equiv_p
+
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
\f
/* Processor table. */
128-bit floating point that can go in vector registers, which has VSX
memory addressing. */
if (FP_REGNO_P (regno))
- reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
+ reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
? UNITS_PER_VSX_WORD
: UNITS_PER_FP_WORD);
if (COMPLEX_MODE_P (mode))
mode = GET_MODE_INNER (mode);
+ /* Vector pair modes need even/odd VSX register pairs. Only allow vector
+ registers. */
+ if (mode == OOmode)
+ return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
+
+ /* MMA accumulator modes need FPR registers divisible by 4. */
+ if (mode == XOmode)
+ return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
+
/* PTImode can only go in GPRs. Quad word memory operations require even/odd
register combinations, and use PTImode where we need to deal with quad
word memory operations. Don't allow quad words in the argument or frame
asked for it. */
if (TARGET_VSX && VSX_REGNO_P (regno)
&& (VECTOR_MEM_VSX_P (mode)
- || FLOAT128_VECTOR_P (mode)
+ || VECTOR_ALIGNMENT_P (mode)
|| reg_addr[mode].scalar_in_vmx_p
|| mode == TImode
|| (TARGET_VADDUQM && mode == V1TImode)))
if (ALTIVEC_REGNO_P (regno))
{
- if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
+ if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
return 0;
return ALTIVEC_REGNO_P (last_regno);
modes and DImode. */
if (FP_REGNO_P (regno))
{
- if (FLOAT128_VECTOR_P (mode))
+ if (VECTOR_ALIGNMENT_P (mode))
return false;
if (SCALAR_FLOAT_MODE_P (mode)
GPR registers, and TImode can go in any GPR as well as VSX registers (PR
57744).
+ Similarly, don't allow OOmode (vector pair, restricted to even VSX
+ registers) or XOmode (vector quad, restricted to FPR registers divisible
+ by 4) to tie with other modes.
+
Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
128-bit floating point on VSX systems ties with other vectors. */
static bool
rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
- if (mode1 == PTImode)
- return mode2 == PTImode;
- if (mode2 == PTImode)
- return false;
+ if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+ || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
+ return mode1 == mode2;
if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
SDmode,
DDmode,
TDmode,
+ V2SImode,
+ V2SFmode,
V16QImode,
V8HImode,
V4SImode,
V2DFmode,
V8SFmode,
V4DFmode,
+ OOmode,
+ XOmode,
CCmode,
CCUNSmode,
CCEQmode,
+ CCFPmode,
};
/* Virtual regs we are interested in. */
else
fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
- cl_target_option_save (&cl_opts, &global_options);
+ cl_target_option_save (&cl_opts, &global_options, &global_options_set);
rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
rs6000_isa_flags);
&& (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
&& msize <= 8
&& !VECTOR_MODE_P (m2)
- && !FLOAT128_VECTOR_P (m2)
+ && !VECTOR_ALIGNMENT_P (m2)
&& !complex_p
&& (m != E_DFmode || !TARGET_VSX)
&& (m != E_SFmode || !TARGET_P8_VECTOR)
addr_mask |= RELOAD_REG_QUAD_OFFSET;
}
+ /* Vector pairs can do both indexed and offset loads if the
+ instructions are enabled, otherwise they can only do offset loads
+ since it will be broken into two vector moves. Vector quads can
+ only do offset loads. */
+ else if ((addr_mask != 0) && TARGET_MMA
+ && (m2 == OOmode || m2 == XOmode))
+ {
+ addr_mask |= RELOAD_REG_OFFSET;
+ if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
+ {
+ addr_mask |= RELOAD_REG_QUAD_OFFSET;
+ if (m2 == OOmode)
+ addr_mask |= RELOAD_REG_INDEXED;
+ }
+ }
+
/* VMX registers can do (REG & -16) and ((REG+REG) & -16)
addressing on 128-bit types. */
if (rc == RELOAD_REG_VMX && msize == 16
rs6000_vector_align[TImode] = align64;
}
+ /* Add support for vector pairs and vector quad registers. */
+ if (TARGET_MMA)
+ {
+ rs6000_vector_unit[OOmode] = VECTOR_NONE;
+ rs6000_vector_mem[OOmode] = VECTOR_VSX;
+ rs6000_vector_align[OOmode] = 256;
+
+ rs6000_vector_unit[XOmode] = VECTOR_NONE;
+ rs6000_vector_mem[XOmode] = VECTOR_VSX;
+ rs6000_vector_align[XOmode] = 512;
+ }
+
/* Register class constraints for the constraints that depend on compile
switches. When the VSX code was added, different constraints were added
based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
}
+
+ if (TARGET_MMA)
+ {
+ reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+ reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+ reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+ reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
+ }
}
}
else
&& !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
| ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
| ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
- | ((TARGET_FUTURE) ? RS6000_BTM_FUTURE : 0));
+ | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
+ | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
}
/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
return NULL;
}
+/* This target function is similar to the hook TARGET_OPTION_OVERRIDE
+ but is called when the optimize level is changed via an attribute or
+ pragma or when it is reset at the end of the code affected by the
+ attribute or pragma. It is not called at the beginning of compilation
+ when TARGET_OPTION_OVERRIDE is called so if you want to perform these
+ actions then, you should have TARGET_OPTION_OVERRIDE call
+ TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
+
+static void
+rs6000_override_options_after_change (void)
+{
+ /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
+ turns -frename-registers on. */
+ if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+ || (global_options_set.x_flag_unroll_all_loops
+ && flag_unroll_all_loops))
+ {
+ if (!global_options_set.x_unroll_only_small_loops)
+ unroll_only_small_loops = 0;
+ if (!global_options_set.x_flag_rename_registers)
+ flag_rename_registers = 1;
+ if (!global_options_set.x_flag_cunroll_grow_size)
+ flag_cunroll_grow_size = 1;
+ }
+ else if (!global_options_set.x_flag_cunroll_grow_size)
+ flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
+}
+
+#ifdef TARGET_USES_LINUX64_OPT
+static void
+rs6000_linux64_override_options ()
+{
+ if (!global_options_set.x_rs6000_alignment_flags)
+ rs6000_alignment_flags = MASK_ALIGN_NATURAL;
+ if (rs6000_isa_flags & OPTION_MASK_64BIT)
+ {
+ if (DEFAULT_ABI != ABI_AIX)
+ {
+ rs6000_current_abi = ABI_AIX;
+ error (INVALID_64BIT, "call");
+ }
+ dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
+ if (ELFv2_ABI_CHECK)
+ {
+ rs6000_current_abi = ABI_ELFv2;
+ if (dot_symbols)
+ error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
+ }
+ if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
+ error (INVALID_64BIT, "relocatable");
+ }
+ if (rs6000_isa_flags & OPTION_MASK_EABI)
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_EABI;
+ error (INVALID_64BIT, "eabi");
+ }
+ if (TARGET_PROTOTYPE)
+ {
+ target_prototype = 0;
+ error (INVALID_64BIT, "prototype");
+ }
+ if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
+ {
+ rs6000_isa_flags |= OPTION_MASK_POWERPC64;
+ error ("%<-m64%> requires a PowerPC64 cpu");
+ }
+ if (!global_options_set.x_rs6000_current_cmodel)
+ SET_CMODEL (CMODEL_MEDIUM);
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
+ {
+ if (global_options_set.x_rs6000_current_cmodel
+ && rs6000_current_cmodel != CMODEL_SMALL)
+ error ("%<-mcmodel incompatible with other toc options%>");
+ if (TARGET_MINIMAL_TOC)
+ SET_CMODEL (CMODEL_SMALL);
+ else if (TARGET_PCREL
+ || (PCREL_SUPPORTED_BY_OS
+ && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
+ /* Ignore -mno-minimal-toc. */
+ ;
+ else
+ SET_CMODEL (CMODEL_SMALL);
+ }
+ if (rs6000_current_cmodel != CMODEL_SMALL)
+ {
+ if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
+ TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
+ if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
+ TARGET_NO_SUM_IN_TOC = 0;
+ }
+ if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
+ {
+ if (global_options_set.x_rs6000_pltseq)
+ warning (0, "%qs unsupported for this ABI",
+ "-mpltseq");
+ rs6000_pltseq = false;
+ }
+ }
+ else if (TARGET_64BIT)
+ error (INVALID_32BIT, "32");
+ else
+ {
+ if (TARGET_PROFILE_KERNEL)
+ {
+ profile_kernel = 0;
+ error (INVALID_32BIT, "profile-kernel");
+ }
+ if (global_options_set.x_rs6000_current_cmodel)
+ {
+ SET_CMODEL (CMODEL_SMALL);
+ error (INVALID_32BIT, "cmodel");
+ }
+ }
+}
+#endif
+
+/* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
+ This support is only in little endian GLIBC 2.32 or newer. */
+static bool
+glibc_supports_ieee_128bit (void)
+{
+#ifdef OPTION_GLIBC
+ if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
+ && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
+ return true;
+#endif /* OPTION_GLIBC. */
+
+ return false;
+}
+
/* Override command line options.
Combine build-specific configuration information with options
}
}
+ if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
+ {
+ if (TARGET_EFFICIENT_UNALIGNED_VSX)
+ rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
+ else
+ rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
+ }
+
+ if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
+ {
+ if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
+ rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+ else
+ rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+ }
+
/* Use long double size to select the appropriate long double. We use
TYPE_PRECISION to differentiate the 3 different long double types. We map
128 into the precision used for TFmode. */
if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
{
+ /* Determine if the user can change the default long double type at
+ compilation time. Only C and C++ support this, and you need GLIBC
+ 2.32 or newer. Only issue one warning. */
static bool warned_change_long_double;
- if (!warned_change_long_double)
+
+ if (!warned_change_long_double
+ && (!glibc_supports_ieee_128bit ()
+ || (!lang_GNU_C () && !lang_GNU_CXX ())))
{
warned_change_long_double = true;
if (TARGET_IEEEQUAD)
rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
}
- /* Enable -mprefixed by default on 'future' systems. */
- if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
+ /* Enable -mprefixed by default on power10 systems. */
+ if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
rs6000_isa_flags |= OPTION_MASK_PREFIXED;
- /* -mprefixed requires -mcpu=future. */
- else if (TARGET_PREFIXED && !TARGET_FUTURE)
+ /* -mprefixed requires -mcpu=power10 (or later). */
+ else if (TARGET_PREFIXED && !TARGET_POWER10)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
- error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
+ error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
}
}
/* Enable Altivec ABI for AIX -maltivec. */
- if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
+ if (TARGET_XCOFF
+ && (TARGET_ALTIVEC || TARGET_VSX)
+ && !global_options_set.x_rs6000_altivec_abi)
{
if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
error ("target attribute or pragma changes AltiVec ABI");
rs6000_isa_flags &= ~OPTION_MASK_PCREL;
}
+ /* Enable -mmma by default on power10 systems. */
+ if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
+ rs6000_isa_flags |= OPTION_MASK_MMA;
+
+ /* Turn off vector pair/mma options on non-power10 systems. */
+ else if (!TARGET_POWER10 && TARGET_MMA)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
+ error ("%qs requires %qs", "-mmma", "-mcpu=power10");
+
+ rs6000_isa_flags &= ~OPTION_MASK_MMA;
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
&& rs6000_tune != PROCESSOR_POWER7
&& rs6000_tune != PROCESSOR_POWER8
&& rs6000_tune != PROCESSOR_POWER9
- && rs6000_tune != PROCESSOR_FUTURE
+ && rs6000_tune != PROCESSOR_POWER10
&& rs6000_tune != PROCESSOR_PPCA2
&& rs6000_tune != PROCESSOR_CELL
&& rs6000_tune != PROCESSOR_PPC476);
|| rs6000_tune == PROCESSOR_POWER7
|| rs6000_tune == PROCESSOR_POWER8
|| rs6000_tune == PROCESSOR_POWER9
- || rs6000_tune == PROCESSOR_FUTURE
+ || rs6000_tune == PROCESSOR_POWER10
|| rs6000_tune == PROCESSOR_PPCE500MC
|| rs6000_tune == PROCESSOR_PPCE500MC64
|| rs6000_tune == PROCESSOR_PPCE5500
break;
case PROCESSOR_POWER9:
- case PROCESSOR_FUTURE:
+ case PROCESSOR_POWER10:
rs6000_cost = &power9_cost;
break;
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_max_completely_peeled_insns, 400);
+ /* The lxvl/stxvl instructions don't perform well before Power10. */
+ if (TARGET_POWER10)
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_partial_vector_usage, 1);
+ else
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_partial_vector_usage, 0);
+
/* Use the 'model' -fsched-pressure algorithm by default. */
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_sched_pressure_algorithm,
SCHED_PRESSURE_MODEL);
- /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
- turns -frename-registers on. */
- if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
- || (global_options_set.x_flag_unroll_all_loops
- && flag_unroll_all_loops))
- {
- if (!global_options_set.x_unroll_only_small_loops)
- unroll_only_small_loops = 0;
- if (!global_options_set.x_flag_rename_registers)
- flag_rename_registers = 1;
- }
-
/* If using typedef char *va_list, signal that
__builtin_va_start (&ap, 0) can be optimized to
ap = __builtin_next_arg (0). */
targetm.expand_builtin_va_start = NULL;
}
+ rs6000_override_options_after_change ();
+
/* If not explicitly specified via option, decide whether to generate indexed
load/store instructions. A value of -1 indicates that the
initial value of this variable has not been overwritten. During
/* Save the initial options in case the user does function specific options */
if (global_init_p)
target_option_default_node = target_option_current_node
- = build_target_option_node (&global_options);
+ = build_target_option_node (&global_options, &global_options_set);
/* If not explicitly specified via option, decide whether to generate the
extra blr's required to preserve the link stack on some cpus (eg, 476). */
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
if (!STMT_VINFO_RELEVANT_P (stmt_info)
compare + branch or compare + isel instructions. */
static unsigned
-adjust_vectorization_cost (enum vect_cost_for_stmt kind,
- struct _stmt_vec_info *stmt_info)
+rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info)
{
if (kind == scalar_stmt && stmt_info && stmt_info->stmt
&& gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
{
int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
misalign);
- stmt_cost += adjust_vectorization_cost (kind, stmt_info);
+ stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
return retval;
}
+/* For some target specific vectorization cost which can't be handled per stmt,
+ we check the requisite conditions and adjust the vectorization cost
+ accordingly if satisfied. One typical example is to model shift cost for
+ vector with length by counting number of required lengths under condition
+ LOOP_VINFO_FULLY_WITH_LENGTH_P. */
+
+static void
+rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
+{
+ struct loop *loop = data->loop_info;
+ gcc_assert (loop);
+ loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
+
+ if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ {
+ rgroup_controls *rgc;
+ unsigned int num_vectors_m1;
+ unsigned int shift_cnt = 0;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
+ if (rgc->type)
+ /* Each length needs one shift to fill into bits 0-7. */
+ shift_cnt += num_vectors_m1 + 1;
+
+ rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
+ NULL, NULL_TREE, 0, vect_body);
+ }
+}
+
/* Implement targetm.vectorize.finish_cost. */
static void
rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
if (cost_data->loop_info)
- rs6000_density_test (cost_data);
+ {
+ rs6000_adjust_vect_cost_per_loop (cost_data);
+ rs6000_density_test (cost_data);
+ }
/* Don't vectorize minimum-vectorization-factor, simple copy loops
that require versioning for any reason. The vectorization is at
{
if (unroll_only_small_loops)
{
- /* TODO: This is hardcoded to 10 right now. It can be refined, for
- example we may want to unroll very small loops more times (4 perhaps).
- We also should use a PARAM for this. */
+ /* TODO: These are hardcoded values right now. We probably should use
+ a PARAM here. */
+ if (loop->ninsns <= 6)
+ return MIN (4, nunroll);
if (loop->ninsns <= 10)
return MIN (2, nunroll);
- else
- return 0;
+
+ return 0;
}
return nunroll;
/* Disable the flags that should never influence the .machine selection. */
flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
- if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
- return "future";
+ if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
+ return "power10";
if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
return "power9";
if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
/* Helper for num_insns_constant. Calculate number of instructions to
load VALUE to a single gpr using combinations of addi, addis, ori,
- oris and sldi instructions. */
+ oris, sldi and rldimi instructions. */
static int
num_insns_constant_gpr (HOST_WIDE_INT value)
high >>= 1;
- if (low == 0)
+ if (low == 0 || low == high)
return num_insns_constant_gpr (high) + 1;
else if (high == 0)
return num_insns_constant_gpr (low) + 1;
{
machine_mode mode = GET_MODE (target);
machine_mode inner_mode = GET_MODE_INNER (mode);
- int n_elts = GET_MODE_NUNITS (mode);
+ unsigned int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0, one_var = -1;
bool all_same = true, all_const_zero = true;
rtx x, mem;
- int i;
+ unsigned int i;
for (i = 0; i < n_elts; ++i)
{
}
else
{
- rtx dbl_even = gen_reg_rtx (V2DFmode);
- rtx dbl_odd = gen_reg_rtx (V2DFmode);
- rtx flt_even = gen_reg_rtx (V4SFmode);
- rtx flt_odd = gen_reg_rtx (V4SFmode);
- rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
- rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
- rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
- rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
-
- /* Use VMRGEW if we can instead of doing a permute. */
- if (TARGET_P8_VECTOR)
+ if (TARGET_P8_VECTOR && TARGET_POWERPC64)
{
- emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
- emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
- emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
- emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
- else
- emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
+ rtx tmp_sf[4];
+ rtx tmp_si[4];
+ rtx tmp_di[4];
+ rtx mrg_di[4];
+ for (i = 0; i < 4; i++)
+ {
+ tmp_si[i] = gen_reg_rtx (SImode);
+ tmp_di[i] = gen_reg_rtx (DImode);
+ mrg_di[i] = gen_reg_rtx (DImode);
+ tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
+ emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
+ emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
+ }
+
+ if (!BYTES_BIG_ENDIAN)
+ {
+ std::swap (tmp_di[0], tmp_di[1]);
+ std::swap (tmp_di[2], tmp_di[3]);
+ }
+
+ emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
+ emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
+ emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
+ emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
+
+ rtx tmp_v2di = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
+ emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
}
else
{
+ rtx dbl_even = gen_reg_rtx (V2DFmode);
+ rtx dbl_odd = gen_reg_rtx (V2DFmode);
+ rtx flt_even = gen_reg_rtx (V4SFmode);
+ rtx flt_odd = gen_reg_rtx (V4SFmode);
+ rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
+ rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
+ rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
+ rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
+
emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
rs6000_expand_vector_init (target, copy);
/* Insert variable. */
- rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
+ rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
+ GEN_INT (one_var));
+ return;
+ }
+
+ if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
+ {
+ rtx op[16];
+ /* Force the values into word_mode registers. */
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx tmp = force_reg (GET_MODE_INNER (mode), XVECEXP (vals, 0, i));
+ if (TARGET_POWERPC64)
+ {
+ op[i] = gen_reg_rtx (DImode);
+ emit_insn (gen_zero_extendqidi2 (op[i], tmp));
+ }
+ else
+ {
+ op[i] = gen_reg_rtx (SImode);
+ emit_insn (gen_zero_extendqisi2 (op[i], tmp));
+ }
+ }
+
+ /* Take unsigned char big endianness on 64bit as example for below
+ construction, the input values are: A, B, C, D, ..., O, P. */
+
+ if (TARGET_DIRECT_MOVE_128)
+ {
+ /* Move to VSX register with vec_concat, each has 2 values.
+ eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
+ vr1[1] = { xxxxxxxC, xxxxxxxD };
+ ...
+ vr1[7] = { xxxxxxxO, xxxxxxxP }; */
+ rtx vr1[8];
+ for (i = 0; i < n_elts / 2; i++)
+ {
+ vr1[i] = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
+ op[i * 2 + 1]));
+ }
+
+ /* Pack vectors with 2 values into vectors with 4 values.
+ eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
+ vr2[1] = { xxxExxxF, xxxGxxxH };
+ vr2[1] = { xxxIxxxJ, xxxKxxxL };
+ vr2[3] = { xxxMxxxN, xxxOxxxP }; */
+ rtx vr2[4];
+ for (i = 0; i < n_elts / 4; i++)
+ {
+ vr2[i] = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
+ vr1[i * 2 + 1]));
+ }
+
+ /* Pack vectors with 4 values into vectors with 8 values.
+ eg: vr3[0] = { xAxBxCxD, xExFxGxH };
+ vr3[1] = { xIxJxKxL, xMxNxOxP }; */
+ rtx vr3[2];
+ for (i = 0; i < n_elts / 8; i++)
+ {
+ vr3[i] = gen_reg_rtx (V8HImode);
+ emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
+ vr2[i * 2 + 1]));
+ }
+
+ /* If it's V8HImode, it's done and return it. */
+ if (mode == V8HImode)
+ {
+ emit_insn (gen_rtx_SET (target, vr3[0]));
+ return;
+ }
+
+ /* Pack vectors with 8 values into 16 values. */
+ rtx res = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
+ emit_insn (gen_rtx_SET (target, res));
+ }
+ else
+ {
+ rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
+ rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
+ rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
+ rtx perm_idx;
+
+ /* Set up some common gen routines and values. */
+ if (BYTES_BIG_ENDIAN)
+ {
+ if (mode == V16QImode)
+ {
+ merge_v16qi = gen_altivec_vmrghb;
+ merge_v8hi = gen_altivec_vmrglh;
+ }
+ else
+ merge_v8hi = gen_altivec_vmrghh;
+
+ merge_v4si = gen_altivec_vmrglw;
+ perm_idx = GEN_INT (3);
+ }
+ else
+ {
+ if (mode == V16QImode)
+ {
+ merge_v16qi = gen_altivec_vmrglb;
+ merge_v8hi = gen_altivec_vmrghh;
+ }
+ else
+ merge_v8hi = gen_altivec_vmrglh;
+
+ merge_v4si = gen_altivec_vmrghw;
+ perm_idx = GEN_INT (0);
+ }
+
+ /* Move to VSX register with direct move.
+ eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
+ vr_qi[1] = { xxxxxxxB, xxxxxxxx };
+ ...
+ vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
+ rtx vr_qi[16];
+ for (i = 0; i < n_elts; i++)
+ {
+ vr_qi[i] = gen_reg_rtx (V16QImode);
+ if (TARGET_POWERPC64)
+ emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
+ else
+ emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
+ }
+
+ /* Merge/move to vector short.
+ eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
+ vr_hi[1] = { xxxxxxxx, xxxxxxCD };
+ ...
+ vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
+ rtx vr_hi[8];
+ for (i = 0; i < 8; i++)
+ {
+ rtx tmp = vr_qi[i];
+ if (mode == V16QImode)
+ {
+ tmp = gen_reg_rtx (V16QImode);
+ emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
+ }
+ vr_hi[i] = gen_reg_rtx (V8HImode);
+ emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
+ }
+
+ /* Merge vector short to vector int.
+ eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
+ vr_si[1] = { xxxxxxxx, xxxxEFGH };
+ ...
+ vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
+ rtx vr_si[4];
+ for (i = 0; i < 4; i++)
+ {
+ rtx tmp = gen_reg_rtx (V8HImode);
+ emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
+ vr_si[i] = gen_reg_rtx (V4SImode);
+ emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
+ }
+
+ /* Merge vector int to vector long.
+ eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
+ vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
+ rtx vr_di[2];
+ for (i = 0; i < 2; i++)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
+ vr_di[i] = gen_reg_rtx (V2DImode);
+ emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
+ }
+
+ rtx res = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
+ emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
+ }
+
return;
}
emit_move_insn (target, mem);
}
-/* Set field ELT of TARGET to VAL. */
+/* Set field ELT_RTX of TARGET to VAL. */
void
-rs6000_expand_vector_set (rtx target, rtx val, int elt)
+rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
{
machine_mode mode = GET_MODE (target);
machine_mode inner_mode = GET_MODE_INNER (mode);
if (VECTOR_MEM_VSX_P (mode))
{
rtx insn = NULL_RTX;
- rtx elt_rtx = GEN_INT (elt);
if (mode == V2DFmode)
insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
}
}
+ gcc_assert (CONST_INT_P (elt_rtx));
+
/* Simplify setting single element vectors like V1TImode. */
- if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
+ if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
+ && INTVAL (elt_rtx) == 0)
{
emit_move_insn (target, gen_lowpart (mode, val));
return;
/* Set permute mask to insert element into target. */
for (i = 0; i < width; ++i)
- XVECEXP (mask, 0, elt*width + i)
- = GEN_INT (i + 0x10);
+ XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
if (BYTES_BIG_ENDIAN)
return (STRICT_ALIGNMENT
|| (!TARGET_EFFICIENT_UNALIGNED_VSX
&& ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
- || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
+ || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
&& (int) align < VECTOR_ALIGN (mode)))));
}
{
rtx op0, op1;
- if (GET_MODE_SIZE (mode) != 16)
+ if (GET_MODE_SIZE (mode) < 16)
return false;
if (legitimate_indirect_address_p (addr, strict))
return mode_supports_dq_form (mode);
break;
+ /* The vector pair/quad types support offset addressing if the
+ underlying vectors support offset addressing. */
+ case E_OOmode:
+ case E_XOmode:
+ return TARGET_MMA;
+
case E_SDmode:
/* If we can do direct load/stores of SDmode, restrict it to reg+reg
addressing for the LFIWZX and STFIWX instructions. */
bool
avoiding_indexed_address_p (machine_mode mode)
{
- /* Avoid indexed addressing for modes that have non-indexed
- load/store instruction forms. */
+ unsigned int msize = GET_MODE_SIZE (mode);
+
+ /* Avoid indexed addressing for modes that have non-indexed load/store
+ instruction forms. On power10, vector pairs have an indexed
+ form, but vector quads don't. */
+ if (msize > 16)
+ return msize != 32;
+
return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
}
low_int = 0;
high_int = INTVAL (XEXP (x, 1)) - low_int;
sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
- GEN_INT (high_int)), 0);
+ gen_int_mode (high_int, Pmode)), 0);
return plus_constant (Pmode, sum, low_int);
}
else if (GET_CODE (x) == PLUS
dest = gen_reg_rtx (Pmode);
if (model == TLS_MODEL_LOCAL_EXEC
- && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
+ && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
{
rtx tlsreg;
them in the .got section. So use a pointer to the .got section,
not one to secondary TOC sections used by 64-bit -mminimal-toc,
or to secondary GOT sections used by 32-bit -fPIC. */
- if (rs6000_pcrel_p (cfun))
+ if (rs6000_pcrel_p ())
got = const0_rtx;
else if (TARGET_64BIT)
got = gen_rtx_REG (Pmode, 2);
rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
- if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
+ if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
{
if (TARGET_64BIT)
insn = gen_tls_dtprel_64 (dest, tmp1, addr);
else
insn = gen_tls_got_tprel_32 (tmp2, got, addr);
emit_insn (insn);
- if (rs6000_pcrel_p (cfun))
+ if (rs6000_pcrel_p ())
{
if (TARGET_64BIT)
insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
bool reg_offset_p = reg_offset_addressing_ok_p (mode);
bool quad_offset_p = mode_supports_dq_form (mode);
- /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
+ if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
+ return 0;
+
+ /* Handle unaligned altivec lvx/stvx type addresses. */
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
&& GET_CODE (x) == AND
&& CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) == -16)
- x = XEXP (x, 0);
+ {
+ x = XEXP (x, 0);
+ return (legitimate_indirect_address_p (x, reg_ok_strict)
+ || legitimate_indexed_address_p (x, reg_ok_strict)
+ || virtual_stack_registers_memory_p (x));
+ }
- if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
- return 0;
if (legitimate_indirect_address_p (x, reg_ok_strict))
return 1;
if (TARGET_UPDATE
{
case PROCESSOR_POWER8:
case PROCESSOR_POWER9:
- case PROCESSOR_FUTURE:
+ case PROCESSOR_POWER10:
if (DECIMAL_FLOAT_MODE_P (mode))
return 1;
if (VECTOR_MODE_P (mode))
operands[1] = force_const_mem (mode, operands[1]);
break;
+ case E_OOmode:
+ case E_XOmode:
+ if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
+ error ("%qs is an opaque type, and you can't set it to other values.",
+ (mode == OOmode) ? "__vector_pair" : "__vector_quad");
+ break;
+
case E_SImode:
case E_DImode:
/* Use default pattern for address of ELF small data */
return NO_REGS;
}
- if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
- return GENERAL_REGS;
+ /* For the vector pair and vector quad modes, prefer their natural register
+ (VSX or FPR) rather than GPR registers. For other integer types, prefer
+ the GPR registers. */
+ if (rclass == GEN_OR_FLOAT_REGS)
+ {
+ if (mode == OOmode)
+ return VSX_REGS;
+
+ if (mode == XOmode)
+ return FLOAT_REGS;
+
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ return GENERAL_REGS;
+ }
return rclass;
}
/* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
output_operand. */
+ case 'A':
+ /* Write the MMA accumulator number associated with VSX register X. */
+ if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
+ output_operand_lossage ("invalid %%A value");
+ else
+ fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
+ return;
+
case 'D':
/* Like 'J' but get to the GT bit only. */
if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
? "+32768" : ""));
static char str[32]; /* 1 spare */
- if (rs6000_pcrel_p (cfun))
+ if (rs6000_pcrel_p ())
sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
rel64);
}
- const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
+ const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
&& flag_pic == 2 ? "+32768" : "");
if (!speculate)
else if (!speculate)
s += sprintf (s, "crset 2\n\t");
- if (rs6000_pcrel_p (cfun))
+ if (rs6000_pcrel_p ())
{
if (speculate)
sprintf (s, "b%%T%ul", funop);
if (!TARGET_FLOAT128_CVT)
{
- if ((mode1 == KFmode && mode2 == IFmode)
- || (mode1 == IFmode && mode2 == KFmode))
- return N_("__float128 and __ibm128 cannot be used in the same "
- "expression");
-
- if (TARGET_IEEEQUAD
- && ((mode1 == IFmode && mode2 == TFmode)
- || (mode1 == TFmode && mode2 == IFmode)))
- return N_("__ibm128 and long double cannot be used in the same "
- "expression");
-
- if (!TARGET_IEEEQUAD
- && ((mode1 == KFmode && mode2 == TFmode)
- || (mode1 == TFmode && mode2 == KFmode)))
- return N_("__float128 and long double cannot be used in the same "
- "expression");
+ if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
+ || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
+ return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
+ "point types");
}
return NULL;
return 1;
}
-/* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
- for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
- comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
- hardware has no such operation. */
+/* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
+ minimum with "C" semantics.
-static int
-rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
+ Unless you use -ffast-math, you can't use these instructions to replace
+ conditions that implicitly reverse the condition because the comparison
+ might generate a NaN or signed zer0.
+
+ I.e. the following can be replaced all of the time
+ ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
+ ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
+ ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
+ ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
+
+ The following can be replaced only if -ffast-math is used:
+ ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
+ ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
+ ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
+ ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
+
+ Move TRUE_COND to DEST if OP of the operands of the last comparison is
+ nonzero/true, FALSE_COND if it is zero/false.
+
+ Return false if we can't generate the appropriate minimum or maximum, and
+ true if we can did the minimum or maximum. */
+
+static bool
+rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
{
enum rtx_code code = GET_CODE (op);
rtx op0 = XEXP (op, 0);
bool max_p = false;
if (result_mode != compare_mode)
- return 0;
+ return false;
if (code == GE || code == GT)
max_p = true;
else if (code == LE || code == LT)
max_p = false;
else
- return 0;
+ return false;
if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
;
max_p = !max_p;
else
- return 0;
+ return false;
rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
- return 1;
+ return true;
}
-/* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
- XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
- operands of the last comparison is nonzero/true, FALSE_COND if it is
- zero/false. Return 0 if the hardware has no such operation. */
+/* Possibly emit a floating point conditional move by generating a compare that
+ sets a mask instruction and a XXSEL select instruction.
-static int
-rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
+ Move TRUE_COND to DEST if OP of the operands of the last comparison is
+ nonzero/true, FALSE_COND if it is zero/false.
+
+ Return false if the operation cannot be generated, and true if we could
+ generate the instruction. */
+
+static bool
+rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
{
enum rtx_code code = GET_CODE (op);
rtx op0 = XEXP (op, 0);
break;
default:
- return 0;
+ return false;
}
/* Generate: [(parallel [(set (dest)
emit_insn (gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (2, cmove_rtx, clobber_rtx)));
- return 1;
+ return true;
+}
+
+/* Helper function to return true if the target has instructions to do a
+ compare and set mask instruction that can be used with XXSEL to implement a
+ conditional move. It is also assumed that such a target also supports the
+ "C" minimum and maximum instructions. */
+
+static bool
+have_compare_and_set_mask (machine_mode mode)
+{
+ switch (mode)
+ {
+ case E_SFmode:
+ case E_DFmode:
+ return TARGET_P9_MINMAX;
+
+ default:
+ break;
+ }
+
+ return false;
}
/* Emit a conditional move: move TRUE_COND to DEST if OP of the
operands of the last comparison is nonzero/true, FALSE_COND if it
is zero/false. Return 0 if the hardware has no such operation. */
-int
+bool
rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
{
enum rtx_code code = GET_CODE (op);
/* In the isel case however, we can use a compare immediate, so
op1 may be a small constant. */
&& (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
- return 0;
+ return false;
if (GET_MODE (true_cond) != result_mode)
- return 0;
+ return false;
if (GET_MODE (false_cond) != result_mode)
- return 0;
+ return false;
- /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
- if (TARGET_P9_MINMAX
- && (compare_mode == SFmode || compare_mode == DFmode)
- && (result_mode == SFmode || result_mode == DFmode))
+ /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
+ instructions. */
+ if (have_compare_and_set_mask (compare_mode)
+ && have_compare_and_set_mask (result_mode))
{
- if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
- return 1;
+ if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
+ return true;
- if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
- return 1;
+ if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
+ return true;
}
/* Don't allow using floating point comparisons for integer results for
now. */
if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
- return 0;
+ return false;
/* First, work out if the hardware can do this at all, or
if it's too slow.... */
{
if (TARGET_ISEL)
return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
- return 0;
+ return false;
}
is_against_zero = op1 == CONST0_RTX (compare_mode);
generated. */
if (SCALAR_FLOAT_MODE_P (compare_mode)
&& flag_trapping_math && ! is_against_zero)
- return 0;
+ return false;
/* Eliminate half of the comparisons by switching operands, this
makes the remaining code simpler. */
/* UNEQ and LTGT take four instructions for a comparison with zero,
it'll probably be faster to use a branch here too. */
if (code == UNEQ && HONOR_NANS (compare_mode))
- return 0;
+ return false;
/* We're going to try to implement comparisons by performing
a subtract, then comparing against zero. Unfortunately,
&& ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
|| (! rtx_equal_p (op0, true_cond)
&& ! rtx_equal_p (op1, true_cond))))
- return 0;
+ return false;
/* At this point we know we can use fsel. */
/* Don't allow compare_mode other than SFmode or DFmode, for others there
is no fsel instruction. */
if (compare_mode != SFmode && compare_mode != DFmode)
- return 0;
+ return false;
/* Reduce the comparison to a comparison against zero. */
if (! is_against_zero)
gen_rtx_GE (VOIDmode,
op0, op1),
true_cond, false_cond)));
- return 1;
+ return true;
}
/* Same as above, but for ints (isel). */
-int
+bool
rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
{
rtx condition_rtx, cr;
bool signedp;
if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
- return 0;
+ return false;
/* We still have to do the compare, because isel doesn't do a
compare, it just looks at the CRx bits set by a previous compare
emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
- return 1;
+ return true;
}
void
reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
mode = GET_MODE (dst);
nregs = hard_regno_nregs (reg, mode);
- if (FP_REGNO_P (reg))
+
+ /* If we have a vector quad register for MMA, and this is a load or store,
+ see if we can use vector paired load/stores. */
+ if (mode == XOmode && TARGET_MMA
+ && (MEM_P (dst) || MEM_P (src)))
+ {
+ reg_mode = OOmode;
+ nregs /= 2;
+ }
+ /* If we have a vector pair/quad mode, split it into two/four separate
+ vectors. */
+ else if (mode == OOmode || mode == XOmode)
+ reg_mode = V1TImode;
+ else if (FP_REGNO_P (reg))
reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
(TARGET_HARD_FLOAT ? DFmode : SFmode);
else if (ALTIVEC_REGNO_P (reg))
return;
}
+ /* The __vector_pair and __vector_quad modes are multi-register
+ modes, so if we have to load or store the registers, we have to be
+ careful to properly swap them if we're in little endian mode
+ below. This means the last register gets the first memory
+ location. We also need to be careful of using the right register
+ numbers if we are splitting XO to OO. */
+ if (mode == OOmode || mode == XOmode)
+ {
+ nregs = hard_regno_nregs (reg, mode);
+ int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
+ if (MEM_P (dst))
+ {
+ unsigned offset = 0;
+ unsigned size = GET_MODE_SIZE (reg_mode);
+
+ /* If we are reading an accumulator register, we have to
+ deprime it before we can access it. */
+ if (TARGET_MMA
+ && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
+ emit_insn (gen_mma_xxmfacc (src, src));
+
+ for (int i = 0; i < nregs; i += reg_mode_nregs)
+ {
+ unsigned subreg =
+ (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+ rtx dst2 = adjust_address (dst, reg_mode, offset);
+ rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
+ offset += size;
+ emit_insn (gen_rtx_SET (dst2, src2));
+ }
+
+ return;
+ }
+
+ if (MEM_P (src))
+ {
+ unsigned offset = 0;
+ unsigned size = GET_MODE_SIZE (reg_mode);
+
+ for (int i = 0; i < nregs; i += reg_mode_nregs)
+ {
+ unsigned subreg =
+ (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+ rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
+ rtx src2 = adjust_address (src, reg_mode, offset);
+ offset += size;
+ emit_insn (gen_rtx_SET (dst2, src2));
+ }
+
+ /* If we are writing an accumulator register, we have to
+ prime it after we've written it. */
+ if (TARGET_MMA
+ && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
+ emit_insn (gen_mma_xxmtacc (dst, dst));
+
+ return;
+ }
+
+ if (GET_CODE (src) == UNSPEC)
+ {
+ gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
+ gcc_assert (REG_P (dst));
+ if (GET_MODE (src) == XOmode)
+ gcc_assert (FP_REGNO_P (REGNO (dst)));
+ if (GET_MODE (src) == OOmode)
+ gcc_assert (VSX_REGNO_P (REGNO (dst)));
+
+ reg_mode = GET_MODE (XVECEXP (src, 0, 0));
+ for (int i = 0; i < XVECLEN (src, 0); i++)
+ {
+ rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
+ emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
+ }
+
+ /* We are writing an accumulator register, so we have to
+ prime it after we've written it. */
+ if (GET_MODE (src) == XOmode)
+ emit_insn (gen_mma_xxmtacc (dst, dst));
+
+ return;
+ }
+
+ /* Register -> register moves can use common code. */
+ }
+
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
+ /* If we are reading an accumulator register, we have to
+ deprime it before we can access it. */
+ if (TARGET_MMA
+ && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
+ emit_insn (gen_mma_xxmfacc (src, src));
+
/* Move register range backwards, if we might have destructive
overlap. */
int i;
- for (i = nregs - 1; i >= 0; i--)
- emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
- i * reg_mode_size),
- simplify_gen_subreg (reg_mode, src, mode,
- i * reg_mode_size)));
+ /* XO/OO are opaque so cannot use subregs. */
+ if (mode == OOmode || mode == XOmode )
+ {
+ for (i = nregs - 1; i >= 0; i--)
+ {
+ rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+ rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+ emit_insn (gen_rtx_SET (dst_i, src_i));
+ }
+ }
+ else
+ {
+ for (i = nregs - 1; i >= 0; i--)
+ emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+ i * reg_mode_size),
+ simplify_gen_subreg (reg_mode, src, mode,
+ i * reg_mode_size)));
+ }
+
+ /* If we are writing an accumulator register, we have to
+ prime it after we've written it. */
+ if (TARGET_MMA
+ && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
+ emit_insn (gen_mma_xxmtacc (dst, dst));
}
else
{
gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
}
+ /* If we are reading an accumulator register, we have to
+ deprime it before we can access it. */
+ if (TARGET_MMA && REG_P (src)
+ && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
+ emit_insn (gen_mma_xxmfacc (src, src));
+
for (i = 0; i < nregs; i++)
{
/* Calculate index to next subword. */
if (j == 0 && used_update)
continue;
- emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
- j * reg_mode_size),
- simplify_gen_subreg (reg_mode, src, mode,
- j * reg_mode_size)));
+ /* XO/OO are opaque so cannot use subregs. */
+ if (mode == OOmode || mode == XOmode )
+ {
+ rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+ rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+ emit_insn (gen_rtx_SET (dst_i, src_i));
+ }
+ else
+ emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+ j * reg_mode_size),
+ simplify_gen_subreg (reg_mode, src, mode,
+ j * reg_mode_size)));
}
+
+ /* If we are writing an accumulator register, we have to
+ prime it after we've written it. */
+ if (TARGET_MMA && REG_P (dst)
+ && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
+ emit_insn (gen_mma_xxmtacc (dst, dst));
+
if (restore_basereg != NULL_RTX)
emit_insn (restore_basereg);
}
/* Separate a load from a narrower, dependent store. */
if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
- || rs6000_tune == PROCESSOR_FUTURE)
+ || rs6000_tune == PROCESSOR_POWER10)
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (PATTERN (dep_insn)) == SET
&& MEM_P (XEXP (PATTERN (insn), 1))
|| rs6000_tune == PROCESSOR_POWER7
|| rs6000_tune == PROCESSOR_POWER8
|| rs6000_tune == PROCESSOR_POWER9
- || rs6000_tune == PROCESSOR_FUTURE
+ || rs6000_tune == PROCESSOR_POWER10
|| rs6000_tune == PROCESSOR_CELL)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
}
break;
- case TYPE_FPLOAD:
- if ((rs6000_tune == PROCESSOR_POWER6)
- && get_attr_update (insn) == UPDATE_NO
- && recog_memoized (dep_insn)
- && (INSN_CODE (dep_insn) >= 0)
- && (get_attr_type (dep_insn) == TYPE_MFFGPR))
- return 2;
-
default:
break;
}
|| get_attr_type (dep_insn) == TYPE_FPSIMPLE)
return 1;
break;
- case TYPE_FPLOAD:
- if (get_attr_update (insn) == UPDATE_NO
- && get_attr_type (dep_insn) == TYPE_MFFGPR)
- return 2;
- break;
default:
break;
}
case PROCESSOR_POWER8:
return 7;
case PROCESSOR_POWER9:
- case PROCESSOR_FUTURE:
+ case PROCESSOR_POWER10:
return 6;
default:
return 1;
/* AltiVec defines five built-in scalar types that serve as vector
elements; we must teach the compiler how to mangle them. The 128-bit
- floating point mangling is target-specific as well. */
+ floating point mangling is target-specific as well. MMA defines
+ two built-in types to be used as opaque vector types. */
static const char *
rs6000_mangle_type (const_tree type)
type = TYPE_MAIN_VARIANT (type);
if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
- && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+ && TREE_CODE (type) != OPAQUE_TYPE)
return NULL;
if (type == bool_char_type_node) return "U6__boolc";
if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
+ if (type == vector_pair_type_node)
+ return "u13__vector_pair";
+ if (type == vector_quad_type_node)
+ return "u13__vector_quad";
+
/* For all other types, use the default mangling. */
return NULL;
}
{
rtx base = const0_rtx;
int regno = 12;
- if (rs6000_pcrel_p (cfun))
+ if (rs6000_pcrel_p ())
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
{
/* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
named section. */
- if (align > BIGGEST_ALIGNMENT)
+ if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
{
resolve_unique_section (decl, reloc, true);
if (IN_NAMED_SECTION (decl))
return true;
}
else if ((outer_code == PLUS
- && reg_or_add_cint_operand (x, VOIDmode))
+ && reg_or_add_cint_operand (x, mode))
|| (outer_code == MINUS
- && reg_or_sub_cint_operand (x, VOIDmode))
+ && reg_or_sub_cint_operand (x, mode))
|| ((outer_code == SET
|| outer_code == IOR
|| outer_code == XOR)
}
break;
+ case UNSPEC:
+ if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
+ {
+ *total = 0;
+ return true;
+ }
+ break;
+
default:
break;
}
allocation a move within the same class might turn
out to be a nop. */
if (rs6000_tune == PROCESSOR_POWER9
- || rs6000_tune == PROCESSOR_FUTURE)
+ || rs6000_tune == PROCESSOR_POWER10)
ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
else
ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
/* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
return register is used in both cases, and we won't see V2DImode/V2DFmode
for pure altivec, combine the two cases. */
- else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
+ else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
&& TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode))
regno = ALTIVEC_ARG_RETURN;
static struct rs6000_opt_mask const rs6000_opt_masks[] =
{
{ "altivec", OPTION_MASK_ALTIVEC, false, true },
+ { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
+ false, true },
+ { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
+ false, true },
{ "cmpb", OPTION_MASK_CMPB, false, true },
{ "crypto", OPTION_MASK_CRYPTO, false, true },
{ "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
{ "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
{ "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
- { "future", OPTION_MASK_FUTURE, false, true },
+ { "power10", OPTION_MASK_POWER10, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
{ "isel", OPTION_MASK_ISEL, false, true },
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
{ "mfpgpr", 0, false, true },
+ { "mma", OPTION_MASK_MMA, false, true },
{ "modulo", OPTION_MASK_MODULO, false, true },
{ "mulhw", OPTION_MASK_MULHW, false, true },
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
{ "powerpc64", RS6000_BTM_POWERPC64, false, false },
{ "float128", RS6000_BTM_FLOAT128, false, false },
{ "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
+ { "mma", RS6000_BTM_MMA, false, false },
+ { "power10", RS6000_BTM_P10, false, false },
};
/* Option variables that we want to support inside attribute((target)) and
&& strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
return true;
- old_optimize = build_optimization_node (&global_options);
+ old_optimize = build_optimization_node (&global_options,
+ &global_options_set);
func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
/* If the function changed the optimization levels as well as setting target
options, start with the optimizations specified. */
if (func_optimize && func_optimize != old_optimize)
- cl_optimization_restore (&global_options,
+ cl_optimization_restore (&global_options, &global_options_set,
TREE_OPTIMIZATION (func_optimize));
/* The target attributes may also change some optimization flags, so update
the optimization options if necessary. */
- cl_target_option_save (&cur_target, &global_options);
+ cl_target_option_save (&cur_target, &global_options, &global_options_set);
rs6000_cpu_index = rs6000_tune_index = -1;
ret = rs6000_inner_target_options (args, true);
if (ret)
{
ret = rs6000_option_override_internal (false);
- new_target = build_target_option_node (&global_options);
+ new_target = build_target_option_node (&global_options,
+ &global_options_set);
}
else
new_target = NULL;
- new_optimize = build_optimization_node (&global_options);
+ new_optimize = build_optimization_node (&global_options,
+ &global_options_set);
if (!new_target)
ret = false;
DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
}
- cl_target_option_restore (&global_options, &cur_target);
+ cl_target_option_restore (&global_options, &global_options_set, &cur_target);
if (old_optimize != new_optimize)
- cl_optimization_restore (&global_options,
+ cl_optimization_restore (&global_options, &global_options_set,
TREE_OPTIMIZATION (old_optimize));
return ret;
bool
rs6000_pragma_target_parse (tree args, tree pop_target)
{
- tree prev_tree = build_target_option_node (&global_options);
+ tree prev_tree = build_target_option_node (&global_options,
+ &global_options_set);
tree cur_tree;
struct cl_target_option *prev_opt, *cur_opt;
HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
cur_tree = ((pop_target)
? pop_target
: target_option_default_node);
- cl_target_option_restore (&global_options,
+ cl_target_option_restore (&global_options, &global_options_set,
TREE_TARGET_OPTION (cur_tree));
}
else
rs6000_cpu_index = rs6000_tune_index = -1;
if (!rs6000_inner_target_options (args, false)
|| !rs6000_option_override_internal (false)
- || (cur_tree = build_target_option_node (&global_options))
+ || (cur_tree = build_target_option_node (&global_options,
+ &global_options_set))
== NULL_TREE)
{
if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
void
rs6000_activate_target_options (tree new_tree)
{
- cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
+ cl_target_option_restore (&global_options, &global_options_set,
+ TREE_TARGET_OPTION (new_tree));
if (TREE_TARGET_GLOBALS (new_tree))
restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
else if (new_tree == target_option_default_node)
static void
rs6000_function_specific_save (struct cl_target_option *ptr,
- struct gcc_options *opts)
+ struct gcc_options *opts,
+ struct gcc_options */* opts_set */)
{
ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
static void
rs6000_function_specific_restore (struct gcc_options *opts,
+ struct gcc_options */* opts_set */,
struct cl_target_option *ptr)
{
const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
const char *const name; /* name of the switch. */
} flags[] = {
- { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
+ { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" },
{ OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
{ OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
{ OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
if (!SYMBOL_REF_P (func)
|| (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
{
- if (!rs6000_pcrel_p (cfun))
+ if (!rs6000_pcrel_p ())
{
/* Save the TOC into its reserved slot before the call,
and prepare to restore it after the call. */
/* A function pointer in the ELFv2 ABI is just a plain address, but
the ABI requires it to be loaded into r12 before the call. */
func_addr = gen_rtx_REG (Pmode, 12);
- if (!rtx_equal_p (func_addr, func))
- emit_move_insn (func_addr, func);
+ emit_move_insn (func_addr, func);
abi_reg = func_addr;
/* Indirect calls via CTR are strongly preferred over indirect
calls via LR, so move the address there. Needed to mark
else
{
/* No TOC register needed for calls from PC-relative callers. */
- if (!rs6000_pcrel_p (cfun))
+ if (!rs6000_pcrel_p ())
/* Direct calls use the TOC: for local calls, the callee will
assume the TOC register is set; for non-local calls, the
PLT stub needs the TOC register. */
{
rtx call[2];
rtx insn;
+ rtx r12 = NULL_RTX;
+ rtx func_addr = func_desc;
gcc_assert (INTVAL (cookie) == 0);
if (global_tlsarg)
tlsarg = global_tlsarg;
+ /* For ELFv2, r12 and CTR need to hold the function address
+ for an indirect call. */
+ if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
+ {
+ r12 = gen_rtx_REG (Pmode, 12);
+ emit_move_insn (r12, func_desc);
+ func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
+ emit_move_insn (func_addr, r12);
+ }
+
/* Create the call. */
- call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
if (value != NULL_RTX)
call[0] = gen_rtx_SET (value, call[0]);
insn = emit_call_insn (insn);
/* Note use of the TOC register. */
- if (!rs6000_pcrel_p (cfun))
+ if (!rs6000_pcrel_p ())
use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
gen_rtx_REG (Pmode, TOC_REGNUM));
+
+ /* Note use of r12. */
+ if (r12)
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
}
/* Expand code to perform a call under the SYSV4 ABI. */
/* Return whether we should generate PC-relative code for *FN. */
bool
-rs6000_pcrel_p (struct function *fn)
+rs6000_function_pcrel_p (struct function *fn)
{
if (DEFAULT_ABI != ABI_ELFv2)
return false;
return rs6000_fndecl_pcrel_p (fn->decl);
}
+/* Return whether we should generate PC-relative code for the current
+ function. */
+bool
+rs6000_pcrel_p ()
+{
+ return (DEFAULT_ABI == ABI_ELFv2
+ && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
+ && TARGET_CMODEL == CMODEL_MEDIUM);
+}
+
\f
/* Given an address (ADDR), a mode (MODE), and what the format of the
non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
non_prefixed_format = NON_PREFIXED_DS;
else if (TARGET_VSX && size >= 16
- && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
+ && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
non_prefixed_format = NON_PREFIXED_DQ;
else
return INSN_FORM_BAD;
}
-/* Helper function to see if we're potentially looking at stfs.
+/* Helper function to see if we're potentially looking at lfs/stfs.
- PARALLEL containing a SET and a CLOBBER
- - SET is from UNSPEC_SI_FROM_SF to MEM:SI
- - CLOBBER is a V4SF
+ - stfs:
+ - SET is from UNSPEC_SI_FROM_SF to MEM:SI
+ - CLOBBER is a V4SF
+ - lfs:
+ - SET is from UNSPEC_SF_FROM_SI to REG:SF
+ - CLOBBER is a DI
*/
static bool
-is_stfs_insn (rtx_insn *insn)
+is_lfs_stfs_insn (rtx_insn *insn)
{
rtx pattern = PATTERN (insn);
if (GET_CODE (pattern) != PARALLEL)
rtx src = SET_SRC (set);
rtx scratch = SET_DEST (clobber);
- if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_SI_FROM_SF)
+ if (GET_CODE (src) != UNSPEC)
return false;
- if (GET_CODE (dest) != MEM || GET_MODE (dest) != SImode)
- return false;
+ /* stfs case. */
+ if (XINT (src, 1) == UNSPEC_SI_FROM_SF
+ && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
+ && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
+ return true;
- if (GET_CODE (scratch) != SCRATCH || GET_MODE (scratch) != V4SFmode)
- return false;
+ /* lfs case. */
+ if (XINT (src, 1) == UNSPEC_SF_FROM_SI
+ && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
+ && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
+ return true;
- return true;
+ return false;
}
/* Helper function to take a REG and a MODE and turn it into the non-prefixed
else if (TARGET_VSX && size >= 16
&& (VECTOR_MODE_P (mode)
- || FLOAT128_VECTOR_P (mode)
+ || VECTOR_ALIGNMENT_P (mode)
|| mode == TImode || mode == CTImode))
return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
else if (TARGET_VSX && size >= 16
&& (VECTOR_MODE_P (mode)
- || FLOAT128_VECTOR_P (mode)
+ || VECTOR_ALIGNMENT_P (mode)
|| mode == TImode || mode == CTImode))
return NON_PREFIXED_DQ;
else
non_prefixed = reg_to_non_prefixed (reg, mem_mode);
- return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
+ if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+ return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
+ else
+ return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
}
/* Whether a store instruction is a prefixed instruction. This is called from
/* Need to make sure we aren't looking at a stfs which doesn't look
like the other things reg_to_non_prefixed/address_is_prefixed
looks for. */
- if (non_prefixed == NON_PREFIXED_X && is_stfs_insn (insn))
+ if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
else
return address_is_prefixed (addr, mem_mode, non_prefixed);
rs6000_mangle_decl_assembler_name (tree decl, tree id)
{
if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
- && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
+ && TREE_CODE (decl) == FUNCTION_DECL
+ && DECL_IS_UNDECLARED_BUILTIN (decl))
{
size_t len = IDENTIFIER_LENGTH (id);
const char *name = IDENTIFIER_POINTER (id);
return false;
}
+/* Implement TARGET_INVALID_CONVERSION. */
+
+static const char *
+rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+ /* Make sure we're working with the canonical types. */
+ if (TYPE_CANONICAL (fromtype) != NULL_TREE)
+ fromtype = TYPE_CANONICAL (fromtype);
+ if (TYPE_CANONICAL (totype) != NULL_TREE)
+ totype = TYPE_CANONICAL (totype);
+
+ machine_mode frommode = TYPE_MODE (fromtype);
+ machine_mode tomode = TYPE_MODE (totype);
+
+ if (frommode != tomode)
+ {
+ /* Do not allow conversions to/from XOmode and OOmode types. */
+ if (frommode == XOmode)
+ return N_("invalid conversion from type %<__vector_quad%>");
+ if (tomode == XOmode)
+ return N_("invalid conversion to type %<__vector_quad%>");
+ if (frommode == OOmode)
+ return N_("invalid conversion from type %<__vector_pair%>");
+ if (tomode == OOmode)
+ return N_("invalid conversion to type %<__vector_pair%>");
+ }
+ else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
+ {
+ /* We really care about the modes of the base types. */
+ frommode = TYPE_MODE (TREE_TYPE (fromtype));
+ tomode = TYPE_MODE (TREE_TYPE (totype));
+
+ /* Do not allow conversions to/from XOmode and OOmode pointer
+ types, except to/from void pointers. */
+ if (frommode != tomode
+ && frommode != VOIDmode
+ && tomode != VOIDmode)
+ {
+ if (frommode == XOmode)
+ return N_("invalid conversion from type %<* __vector_quad%>");
+ if (tomode == XOmode)
+ return N_("invalid conversion to type %<* __vector_quad%>");
+ if (frommode == OOmode)
+ return N_("invalid conversion from type %<* __vector_pair%>");
+ if (tomode == OOmode)
+ return N_("invalid conversion to type %<* __vector_pair%>");
+ }
+ }
+
+ /* Conversion allowed. */
+ return NULL;
+}
+
+long long
+rs6000_const_f32_to_i32 (rtx operand)
+{
+ long long value;
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
+
+ gcc_assert (GET_MODE (operand) == SFmode);
+ REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
+ return value;
+}
+
+void
+rs6000_emit_xxspltidp_v2df (rtx dst, long value)
+{
+ if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
+ inform (input_location,
+ "the result for the xxspltidp instruction "
+ "is undefined for subnormal input values");
+ emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
+}
+
+/* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
+
+static bool
+rs6000_gen_pic_addr_diff_vec (void)
+{
+ return rs6000_relative_jumptables;
+}
+
+void
+rs6000_output_addr_vec_elt (FILE *file, int value)
+{
+ const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
+ char buf[100];
+
+ fprintf (file, "%s", directive);
+ ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
+ assemble_name (file, buf);
+ fprintf (file, "\n");
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"