return new pass_remove_partial_avx_dependency (ctxt);
}
+/* Convert legacy instructions that clobbers EFLAGS to APX_NF
+ instructions when there are no flag set between a flag
+ producer and user. */
+
+static unsigned int
+ix86_apx_nf_convert (void)
+{
+ timevar_push (TV_MACH_DEP);
+
+ basic_block bb;
+ rtx_insn *insn;
+ hash_map <rtx_insn *, rtx> converting_map;
+ auto_vec <rtx_insn *> current_convert_list;
+
+ bool converting_seq = false;
+ rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ /* Reset conversion for each bb. */
+ converting_seq = false;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ if (recog_memoized (insn) < 0)
+ continue;
+
+ /* Convert candidate insns after cstore, which should
+ satisify the two conditions:
+ 1. Is not flag user or producer, only clobbers
+ FLAGS_REG.
+ 2. Have corresponding nf pattern. */
+
+ rtx pat = PATTERN (insn);
+
+ /* Starting convertion at first cstorecc. */
+ rtx set = NULL_RTX;
+ if (!converting_seq
+ && (set = single_set (insn))
+ && ix86_comparison_operator (SET_SRC (set), VOIDmode)
+ && reg_overlap_mentioned_p (cc, SET_SRC (set))
+ && !reg_overlap_mentioned_p (cc, SET_DEST (set)))
+ {
+ converting_seq = true;
+ current_convert_list.truncate (0);
+ }
+ /* Terminate at the next explicit flag set. */
+ else if (reg_set_p (cc, pat)
+ && GET_CODE (set_of (cc, pat)) != CLOBBER)
+ converting_seq = false;
+
+ if (!converting_seq)
+ continue;
+
+ if (get_attr_has_nf (insn)
+ && GET_CODE (pat) == PARALLEL)
+ {
+ /* Record the insn to candidate map. */
+ current_convert_list.safe_push (insn);
+ converting_map.put (insn, pat);
+ }
+ /* If the insn clobbers flags but has no nf_attr,
+ revoke all previous candidates. */
+ else if (!get_attr_has_nf (insn)
+ && reg_set_p (cc, pat)
+ && GET_CODE (set_of (cc, pat)) == CLOBBER)
+ {
+ for (auto item : current_convert_list)
+ converting_map.remove (item);
+ converting_seq = false;
+ }
+ }
+ }
+
+ if (!converting_map.is_empty ())
+ {
+ for (auto iter = converting_map.begin ();
+ iter != converting_map.end (); ++iter)
+ {
+ rtx_insn *replace = (*iter).first;
+ rtx pat = (*iter).second;
+ int i, n = 0, len = XVECLEN (pat, 0);
+ rtx *new_elems = XALLOCAVEC (rtx, len);
+ rtx new_pat;
+ for (i = 0; i < len; i++)
+ {
+ rtx temp = XVECEXP (pat, 0, i);
+ if (! (GET_CODE (temp) == CLOBBER
+ && reg_overlap_mentioned_p (cc,
+ XEXP (temp, 0))))
+ {
+ new_elems[n] = temp;
+ n++;
+ }
+ }
+
+ if (n == 1)
+ new_pat = new_elems[0];
+ else
+ new_pat =
+ gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (n,
+ new_elems));
+
+ PATTERN (replace) = new_pat;
+ INSN_CODE (replace) = -1;
+ recog_memoized (replace);
+ df_insn_rescan (replace);
+ }
+ }
+
+ timevar_pop (TV_MACH_DEP);
+ return 0;
+}
+
+
+namespace {
+
+const pass_data pass_data_apx_nf_convert =
+{
+ RTL_PASS, /* type */
+ "apx_nfcvt", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_apx_nf_convert : public rtl_opt_pass
+{
+public:
+ pass_apx_nf_convert (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_apx_nf_convert, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *) final override
+ {
+ return (TARGET_APX_NF
+ && optimize
+ && optimize_function_for_speed_p (cfun));
+ }
+
+ unsigned int execute (function *) final override
+ {
+ return ix86_apx_nf_convert ();
+ }
+}; // class pass_rpad
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_apx_nf_convert (gcc::context *ctxt)
+{
+ return new pass_apx_nf_convert (ctxt);
+}
+
+
/* This compares the priority of target features in function DECL1
and DECL2. It returns positive value if DECL1 is higher priority,
negative value if DECL2 is higher priority and 0 if they are the
(define_attr "preferred_for_size" "" (const_int 1))
(define_attr "preferred_for_speed" "" (const_int 1))
+;; Define attribute to mark the insn has nf variant.
+(define_attr "has_nf" "0,1" (const_string "0"))
+
;; Describe a user's asm statement.
(define_asm_attributes
[(set_attr "length" "128")
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
(const_string "1")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
;; It may seem that nonimmediate operand is proper one for operand 1.
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
(const_string "1")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "HI,HI,HI,SI,HI,HI")])
(define_insn "*addqi_1<nf_name>"
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
(const_string "1")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI")
;; Potential partial reg stall on alternatives 3 and 4.
(set (attr "preferred_for_speed")
<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
(set_attr "type" "alu")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*subsi_1_zext"
(match_test "<MODE>mode == HImode")
(const_string "double")
(const_string "direct")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*imulhi<mode>zu<nf_name>"
(const_string "direct")))
(set_attr "amdfam10_decode" "direct")
(set_attr "bdver1_decode" "direct")
+ (set_attr "has_nf" "1")
(set_attr "mode" "QI")])
;; Multiply with jump on overflow.
""
"<sgnprefix>div{<imodesuffix>}\t%3"
[(set_attr "type" "idiv")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*<u>divmodsi4_noext_zext_1"
&& <nf_condition>"
"<nf_prefix><sgnprefix>div{b}\t%2"
[(set_attr "type" "idiv")
+ (set_attr "has_nf" "1")
(set_attr "mode" "QI")])
;; We cannot use div/idiv for double division, because it causes
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,SI,DI")])
(define_insn_and_split "*anddi_1_btr"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
(define_insn "*andqi_1<nf_name>"
#"
[(set_attr "type" "alu,alu,alu,alu,alu,msklog")
(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
+ (set_attr "has_nf" "1")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
#"
[(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
(set_attr "type" "alu,alu, alu, alu, alu, alu, msklog")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*notxor<mode>_1"
#"
[(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
(set_attr "type" "alu,alu,alu,alu,alu,msklog")
+ (set_attr "has_nf" "1")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
<nf_prefix>neg{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "negnot")
(set_attr "isa" "*,apx_ndd")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*negsi_1_zext"
"<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
"TARGET_APX_NDD && <nf_condition>"
"<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")])
(define_insn "x86_64_shld_1<nf_name>"
"<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")
(set_attr "length_immediate" "1")
(set_attr "athlon_decode" "vector")
&& <nf_condition>"
"<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")
(set_attr "length_immediate" "1")])
emit_move_insn (operands[0], tmp);
}
DONE;
-})
+}
+ [(set_attr "has_nf" "1")])
(define_insn_and_split "*x86_64_shld_2"
[(set (match_operand:DI 0 "nonimmediate_operand")
"<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
"TARGET_APX_NDD && <nf_condition>"
"<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")])
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "length_immediate" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
"<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "length_immediate" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")])
(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
emit_move_insn (operands[0], tmp);
}
DONE;
-})
+}
+ [(set_attr "has_nf" "1")])
(define_insn_and_split "*x86_shld_2"
[(set (match_operand:SI 0 "nonimmediate_operand")
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
;; Convert shift to the shiftx pattern to avoid flags dependency.
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "HI,SI,HI,HI")])
(define_insn "*ashlqi3_1<nf_name>"
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "QI,SI,SI,QI,QI")
;; Potential partial reg stall on alternative 1.
(set (attr "preferred_for_speed")
operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
if (!rtx_equal_p (operands[0], operands[1]))
emit_move_insn (operands[0], operands[1]);
-})
+}
+ [(set_attr "has_nf" "1")])
(define_insn "x86_64_shrd<nf_name>"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
"<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
"TARGET_APX_NDD && <nf_condition>"
"<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")])
(define_insn "x86_64_shrd_1<nf_name>"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "length_immediate" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
"<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "length_immediate" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "DI")])
(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
emit_move_insn (operands[0], tmp);
}
DONE;
-})
+}
+ [(set_attr "has_nf" "1")])
(define_insn_and_split "*x86_64_shrd_2"
[(set (match_operand:DI 0 "nonimmediate_operand")
"<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
"TARGET_APX_NDD && <nf_condition>"
"<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")])
(define_insn "x86_shrd_1<nf_name>"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "length_immediate" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
"<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "length_immediate" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "SI")])
(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
emit_move_insn (operands[0], tmp);
}
DONE;
-})
+}
+ [(set_attr "has_nf" "1")])
(define_insn_and_split "*x86_shrd_2"
[(set (match_operand:SI 0 "nonimmediate_operand")
(set_attr "prefix_0f" "0,*,*")
(set_attr "length_immediate" "0,*,*")
(set_attr "modrm" "0,1,1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*ashrsi3_cvt_zext"
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
;; Specialization of *lshr<mode>3_1 below, extracting the SImode
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
;; Convert shift to the shiftx pattern to avoid flags dependency.
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*lshrqi3_1<nf_name>"
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "QI")])
(define_insn "*lshrhi3_1<nf_name>"
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "HI")])
;; Alternative 1 is needed to work around LRA limitation, see PR82524.
(match_test "optimize_function_for_size_p (cfun)"))))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
;; Convert rotate to the rotatex pattern to avoid flags dependency.
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
;; Alternative 1 is needed to work around LRA limitation, see PR82524.
"ix86_expand_clear (operands[0]);"
[(set_attr "prefix_rep" "1")
(set_attr "type" "bitmanip")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
; False dependency happens when destination is only updated by tzcnt,
"lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "prefix_rep" "1")
(set_attr "type" "bitmanip")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*clzsi2_lzcnt_zext"
[(set_attr "type" "<lt_zcnt_type>")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
; False dependency happens when destination is only updated by tzcnt,
[(set_attr "type" "<lt_zcnt_type>")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn "<lt_zcnt>_hi<nf_name>"
[(set_attr "type" "<lt_zcnt_type>")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
+ (set_attr "has_nf" "1")
(set_attr "mode" "HI")])
;; BMI instructions.
"ix86_expand_clear (operands[0]);"
[(set_attr "prefix_rep" "1")
(set_attr "type" "bitmanip")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
; False dependency happens when destination is only updated by tzcnt,
}
[(set_attr "prefix_rep" "1")
(set_attr "type" "bitmanip")
+ (set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*popcountsi2_zext"
}
[(set_attr "prefix_rep" "1")
(set_attr "type" "bitmanip")
+ (set_attr "has_nf" "1")
(set_attr "mode" "HI")])
(define_expand "bswapdi2"