bool win;
int j;
+ /* For pre-AVX disallow unaligned loads/stores where the
+ instructions don't support it. */
+ if (!TARGET_AVX
+ && VECTOR_MODE_P (GET_MODE (op))
+ && misaligned_operand (op, GET_MODE (op)))
+ {
+ int min_align = get_attr_ssememalign (insn);
+ if (min_align == 0)
+ return false;
+ }
+
/* A unary operator may be accepted by the predicate, but it
is irrelevant for matching constraints. */
if (UNARY_P (op))
static rtx
ix86_expand_special_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
+ tree exp, rtx target)
{
tree arg;
rtx pat, op;
unsigned int i, nargs, arg_adjust, memory;
+ bool aligned_mem = false;
struct
{
rtx op;
nargs = 1;
klass = load;
memory = 0;
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_movntdqa:
+ case CODE_FOR_avx2_movntdqa:
+ aligned_mem = true;
+ break;
+ default:
+ break;
+ }
break;
case VOID_FTYPE_PV2SF_V4SF:
case VOID_FTYPE_PV4DI_V4DI:
klass = store;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
+ switch (icode)
+ {
+ /* These builtins and instructions require the memory
+ to be properly aligned. */
+ case CODE_FOR_avx_movntv4di:
+ case CODE_FOR_sse2_movntv2di:
+ case CODE_FOR_avx_movntv8sf:
+ case CODE_FOR_sse_movntv4sf:
+ case CODE_FOR_sse4a_vmmovntv4sf:
+ case CODE_FOR_avx_movntv4df:
+ case CODE_FOR_sse2_movntv2df:
+ case CODE_FOR_sse4a_vmmovntv2df:
+ case CODE_FOR_sse2_movntidi:
+ case CODE_FOR_sse_movntq:
+ case CODE_FOR_sse2_movntisi:
+ aligned_mem = true;
+ break;
+ default:
+ break;
+ }
break;
case V4SF_FTYPE_V4SF_PCV2SF:
case V2DF_FTYPE_V2DF_PCDOUBLE:
{
op = ix86_zero_extend_to_Pmode (op);
target = gen_rtx_MEM (tmode, op);
+ /* target at this point has just BITS_PER_UNIT MEM_ALIGN
+ on it. Try to improve it using get_pointer_alignment,
+ and if the special builtin is one that requires strict
+ mode alignment, also from it's GET_MODE_ALIGNMENT.
+ Failure to do so could leak to ix86_legitimate_combined_insn
+ rejecting all changes to such insns. */
+ unsigned int align = get_pointer_alignment (arg);
+ if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
+ align = GET_MODE_ALIGNMENT (tmode);
+ if (MEM_ALIGN (target) < align)
+ set_mem_align (target, align);
}
else
target = force_reg (tmode, op);
/* This must be the memory operand. */
op = ix86_zero_extend_to_Pmode (op);
op = gen_rtx_MEM (mode, op);
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
+ /* op at this point has just BITS_PER_UNIT MEM_ALIGN
+ on it. Try to improve it using get_pointer_alignment,
+ and if the special builtin is one that requires strict
+ mode alignment, also from it's GET_MODE_ALIGNMENT.
+ Failure to do so could leak to ix86_legitimate_combined_insn
+ rejecting all changes to such insns. */
+ unsigned int align = get_pointer_alignment (arg);
+ if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
+ align = GET_MODE_ALIGNMENT (mode);
+ if (MEM_ALIGN (op) < align)
+ set_mem_align (op, align);
}
else
{
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
+ (set_attr "ssememalign" "8")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
+ (set_attr "ssememalign" "8")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
+ (set_attr "ssememalign" "8")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
+ (set_attr "ssememalign" "8")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
"%vlddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
+ (set_attr "ssememalign" "8")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
vrcpss\t{%1, %2, %0|%0, %2, %k1}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
+ (set_attr "ssememalign" "32")
(set_attr "atom_sse_attr" "rcp")
(set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "orig,vex")
vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
+ (set_attr "ssememalign" "32")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
"%vcvtdq2pd\t{%1, %0|%0, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
+ (set_attr "ssememalign" "64")
(set_attr "mode" "V2DF")])
(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>"
%vmovhps\t{%2, %0|%q0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
%vmovlps\t{%2, %H0|%H0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
%vmovhlps\t{%1, %d0|%d0, %1}
%vmovlps\t{%H1, %d0|%d0, %H1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2SF,V4SF,V2SF")])
%vmovlps\t{%2, %H0|%H0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
(set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
%vmovlps\t{%2, %0|%q0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "length_immediate" "1,1,*,*,*")
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
%vmovhpd\t{%1, %0|%q0, %1}"
[(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
- (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+ (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
(set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
%vmovlpd\t{%2, %H0|%H0, %2}"
[(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
(set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
movhlps\t{%1, %0|%0, %1}
movlps\t{%H1, %0|%0, %H1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "mode" "V2SF,V4SF,V2SF")])
;; Avoid combining registers from different units in a single alternative,
#"
[(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
(set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix_data16" "1,*,*,*,*,*,*")
(set_attr "prefix" "orig,vex,orig,vex,*,*,*")
(set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
(const_string "imov")
]
(const_string "ssemov")))
+ (set_attr "ssememalign" "64")
(set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
(set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
(set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
(const_string "1")
(const_string "*")))
(set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
+ (set_attr "ssememalign" "64")
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
(set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
"TARGET_SSE4_1"
"%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
"TARGET_SSE4_1"
"%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "32")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
"TARGET_SSE4_1"
"%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
"TARGET_SSE4_1"
"%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "16")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
"TARGET_SSE4_1"
"%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "32")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
"TARGET_SSE4_1"
"%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
[(set_attr "type" "ssemov")
+ (set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load")
(set_attr "mode" "TI")])
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "memory" "load")
(set_attr "mode" "TI")])
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "btver2_decode" "vector")
(set_attr "memory" "none,load")
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "btver2_decode" "vector")
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load,none,load")
(set_attr "btver2_decode" "vector,vector,vector,vector")
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load")
(set_attr "mode" "TI")])
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "memory" "load")
(set_attr "mode" "TI")])
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "none,load")
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "none,load")
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "ssememalign" "8")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load,none,load")
(set_attr "prefix" "maybe_vex")