From b8806f6ffbe72ed8fb7aba3b3a9196ec4d13e614 Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Mon, 3 Jul 2023 17:10:26 +0800 Subject: [PATCH] Middle-end: Change order of LEN_MASK_LOAD/LEN_MASK_STORE arguments Hi, Richard. I fix the order as you suggeted. Before this patch, the order is {len,mask,bias}. Now, after this patch, the order becomes {len,bias,mask}. Since you said we should not need 'internal_fn_bias_index', the bias index should always be the len index + 1. I notice LEN_STORE order is {len,vector,bias}, to make them consistent, I reorder into LEN_STORE {len,bias,vector}. Just like MASK_STORE {mask,vector}. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md: Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. * config/riscv/riscv-v.cc (expand_load_store): Ditto. * doc/md.texi: Ditto. * gimple-fold.cc (gimple_fold_partial_load_store_mem_ref): Ditto. * internal-fn.cc (len_maskload_direct): Ditto. (len_maskstore_direct): Ditto. (add_len_and_mask_args): New function. (expand_partial_load_optab_fn): Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. (expand_partial_store_optab_fn): Ditto. (internal_fn_len_index): New function. (internal_fn_mask_index): Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. (internal_fn_stored_value_index): Ditto. (internal_len_load_store_bias): Ditto. * internal-fn.h (internal_fn_len_index): New function. * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. * tree-vect-stmts.cc (vectorizable_store): Ditto. (vectorizable_load): Ditto. --- gcc/config/riscv/autovec.md | 8 +- gcc/config/riscv/riscv-v.cc | 2 +- gcc/doc/md.texi | 16 ++-- gcc/gimple-fold.cc | 8 +- gcc/internal-fn.cc | 156 ++++++++++++++++++------------------ gcc/internal-fn.h | 1 + gcc/tree-ssa-dse.cc | 11 +-- gcc/tree-vect-stmts.cc | 11 +-- 8 files changed, 107 insertions(+), 106 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 1488f2be1be5..4ab0e9f99eba 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -26,8 +26,8 @@ [(match_operand:V 0 "register_operand") (match_operand:V 1 "memory_operand") (match_operand 2 "autovec_length_operand") - (match_operand: 3 "vector_mask_operand") - (match_operand 4 "const_0_operand")] + (match_operand 3 "const_0_operand") + (match_operand: 4 "vector_mask_operand")] "TARGET_VECTOR" { riscv_vector::expand_load_store (operands, true); @@ -38,8 +38,8 @@ [(match_operand:V 0 "memory_operand") (match_operand:V 1 "register_operand") (match_operand 2 "autovec_length_operand") - (match_operand: 3 "vector_mask_operand") - (match_operand 4 "const_0_operand")] + (match_operand 3 "const_0_operand") + (match_operand: 4 "vector_mask_operand")] "TARGET_VECTOR" { riscv_vector::expand_load_store (operands, false); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index adb8d7d36a58..8d5bed7ebe43 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2777,7 +2777,7 @@ expand_load_store (rtx *ops, bool is_load) { poly_int64 value; rtx len = ops[2]; - rtx mask = ops[3]; + rtx mask = ops[4]; machine_mode mode = GET_MODE (ops[0]); if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode))) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index cefdee84821a..5e5482265cdf 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5302,15 +5302,15 @@ This pattern is not allowed to @code{FAIL}. @cindex @code{len_maskload@var{m}@var{n}} instruction pattern @item @samp{len_maskload@var{m}@var{n}} Perform a masked load from the memory location pointed to by operand 1 -into register operand 0. (operand 2 + operand 4) elements are loaded from +into register operand 0. (operand 2 + operand 3) elements are loaded from memory and other elements in operand 0 are set to undefined values. This is a combination of len_load and maskload. Operands 0 and 1 have mode @var{m}, which must be a vector mode. Operand 2 has whichever integer mode the target prefers. A mask is specified in -operand 3 which must be of type @var{n}. The mask has lower precedence than +operand 4 which must be of type @var{n}. The mask has lower precedence than the length and is itself subject to length masking, -i.e. only mask indices < (operand 2 + operand 4) are used. -Operand 4 conceptually has mode @code{QI}. +i.e. only mask indices < (operand 2 + operand 3) are used. +Operand 3 conceptually has mode @code{QI}. Operand 2 can be a variable or a constant amount. Operand 4 specifies a constant bias: it is either a constant 0 or a constant -1. The predicate on @@ -5329,14 +5329,14 @@ This pattern is not allowed to @code{FAIL}. @cindex @code{len_maskstore@var{m}@var{n}} instruction pattern @item @samp{len_maskstore@var{m}@var{n}} Perform a masked store from vector register operand 1 into memory operand 0. -(operand 2 + operand 4) elements are stored to memory +(operand 2 + operand 3) elements are stored to memory and leave the other elements of operand 0 unchanged. This is a combination of len_store and maskstore. Operands 0 and 1 have mode @var{m}, which must be a vector mode. Operand 2 has whichever -integer mode the target prefers. A mask is specified in operand 3 which must be +integer mode the target prefers. A mask is specified in operand 4 which must be of type @var{n}. The mask has lower precedence than the length and is itself subject to -length masking, i.e. only mask indices < (operand 2 + operand 4) are used. -Operand 4 conceptually has mode @code{QI}. +length masking, i.e. only mask indices < (operand 2 + operand 3) are used. +Operand 3 conceptually has mode @code{QI}. Operand 2 can be a variable or a constant amount. Operand 3 specifies a constant bias: it is either a constant 0 or a constant -1. The predicate on diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 8434274f69db..4027ff71e103 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -5391,11 +5391,12 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p) } else { - tree basic_len = gimple_call_arg (call, 2); + internal_fn ifn = gimple_call_internal_fn (call); + int len_index = internal_fn_len_index (ifn); + tree basic_len = gimple_call_arg (call, len_index); if (!poly_int_tree_p (basic_len)) return NULL_TREE; - unsigned int nargs = gimple_call_num_args (call); - tree bias = gimple_call_arg (call, nargs - 1); + tree bias = gimple_call_arg (call, len_index + 1); gcc_assert (TREE_CODE (bias) == INTEGER_CST); /* For LEN_LOAD/LEN_STORE/LEN_MASK_LOAD/LEN_MASK_STORE, we don't fold when (bias + len) != VF. */ @@ -5405,7 +5406,6 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p) /* For LEN_MASK_{LOAD,STORE}, we should also check whether the mask is all ones mask. */ - internal_fn ifn = gimple_call_internal_fn (call); if (ifn == IFN_LEN_MASK_LOAD || ifn == IFN_LEN_MASK_STORE) { tree mask = gimple_call_arg (call, internal_fn_mask_index (ifn)); diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 9017176dc7a5..c1fcb38b17bf 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -165,7 +165,7 @@ init_internal_fns () #define mask_load_lanes_direct { -1, -1, false } #define gather_load_direct { 3, 1, false } #define len_load_direct { -1, -1, false } -#define len_maskload_direct { -1, 3, false } +#define len_maskload_direct { -1, 4, false } #define mask_store_direct { 3, 2, false } #define store_lanes_direct { 0, 0, false } #define mask_store_lanes_direct { 0, 0, false } @@ -173,7 +173,7 @@ init_internal_fns () #define vec_cond_direct { 2, 0, false } #define scatter_store_direct { 3, 1, false } #define len_store_direct { 3, 3, false } -#define len_maskstore_direct { 4, 3, false } +#define len_maskstore_direct { 4, 5, false } #define vec_set_direct { 3, 3, false } #define unary_direct { 0, 0, true } #define unary_convert_direct { -1, 0, true } @@ -293,6 +293,38 @@ get_multi_vector_move (tree array_type, convert_optab optab) return convert_optab_handler (optab, imode, vmode); } +/* Add len and mask arguments according to the STMT. */ + +static unsigned int +add_len_and_mask_args (expand_operand *ops, unsigned int opno, gcall *stmt) +{ + internal_fn ifn = gimple_call_internal_fn (stmt); + int len_index = internal_fn_len_index (ifn); + /* BIAS is always consecutive next of LEN. */ + int bias_index = len_index + 1; + int mask_index = internal_fn_mask_index (ifn); + /* The order of arguments are always {len,bias,mask}. */ + if (len_index >= 0) + { + tree len = gimple_call_arg (stmt, len_index); + rtx len_rtx = expand_normal (len); + create_convert_operand_from (&ops[opno++], len_rtx, + TYPE_MODE (TREE_TYPE (len)), + TYPE_UNSIGNED (TREE_TYPE (len))); + tree biast = gimple_call_arg (stmt, bias_index); + rtx bias = expand_normal (biast); + create_input_operand (&ops[opno++], bias, QImode); + } + if (mask_index >= 0) + { + tree mask = gimple_call_arg (stmt, mask_index); + rtx mask_rtx = expand_normal (mask); + create_input_operand (&ops[opno++], mask_rtx, + TYPE_MODE (TREE_TYPE (mask))); + } + return opno; +} + /* Expand LOAD_LANES call STMT using optab OPTAB. */ static void @@ -2879,14 +2911,15 @@ expand_call_mem_ref (tree type, gcall *stmt, int index) * OPTAB. */ static void -expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) +expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab) { + int i = 0; class expand_operand ops[5]; - tree type, lhs, rhs, maskt, biast; - rtx mem, target, mask, bias; + tree type, lhs, rhs, maskt; + rtx mem, target; insn_code icode; - maskt = gimple_call_arg (stmt, 2); + maskt = gimple_call_arg (stmt, internal_fn_mask_index (ifn)); lhs = gimple_call_lhs (stmt); if (lhs == NULL_TREE) return; @@ -2903,38 +2936,11 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) mem = expand_expr (rhs, NULL_RTX, VOIDmode, EXPAND_WRITE); gcc_assert (MEM_P (mem)); - mask = expand_normal (maskt); target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); - create_output_operand (&ops[0], target, TYPE_MODE (type)); - create_fixed_operand (&ops[1], mem); - if (optab == len_load_optab) - { - create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), - TYPE_UNSIGNED (TREE_TYPE (maskt))); - biast = gimple_call_arg (stmt, 3); - bias = expand_normal (biast); - create_input_operand (&ops[3], bias, QImode); - expand_insn (icode, 4, ops); - } - else if (optab == len_maskload_optab) - { - create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), - TYPE_UNSIGNED (TREE_TYPE (maskt))); - maskt = gimple_call_arg (stmt, 3); - mask = expand_normal (maskt); - create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt))); - icode = convert_optab_handler (optab, TYPE_MODE (type), - TYPE_MODE (TREE_TYPE (maskt))); - biast = gimple_call_arg (stmt, 4); - bias = expand_normal (biast); - create_input_operand (&ops[4], bias, QImode); - expand_insn (icode, 5, ops); - } - else - { - create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); - expand_insn (icode, 3, ops); - } + create_output_operand (&ops[i++], target, TYPE_MODE (type)); + create_fixed_operand (&ops[i++], mem); + i = add_len_and_mask_args (ops, i, stmt); + expand_insn (icode, i, ops); if (!rtx_equal_p (target, ops[0].value)) emit_move_insn (target, ops[0].value); @@ -2951,12 +2957,13 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) static void expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab) { + int i = 0; class expand_operand ops[5]; - tree type, lhs, rhs, maskt, biast; - rtx mem, reg, mask, bias; + tree type, lhs, rhs, maskt; + rtx mem, reg; insn_code icode; - maskt = gimple_call_arg (stmt, 2); + maskt = gimple_call_arg (stmt, internal_fn_mask_index (ifn)); rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn)); type = TREE_TYPE (rhs); lhs = expand_call_mem_ref (type, stmt, 0); @@ -2971,37 +2978,11 @@ expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab mem = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); gcc_assert (MEM_P (mem)); - mask = expand_normal (maskt); reg = expand_normal (rhs); - create_fixed_operand (&ops[0], mem); - create_input_operand (&ops[1], reg, TYPE_MODE (type)); - if (optab == len_store_optab) - { - create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), - TYPE_UNSIGNED (TREE_TYPE (maskt))); - biast = gimple_call_arg (stmt, 4); - bias = expand_normal (biast); - create_input_operand (&ops[3], bias, QImode); - expand_insn (icode, 4, ops); - } - else if (optab == len_maskstore_optab) - { - create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), - TYPE_UNSIGNED (TREE_TYPE (maskt))); - maskt = gimple_call_arg (stmt, 3); - mask = expand_normal (maskt); - create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt))); - biast = gimple_call_arg (stmt, 5); - bias = expand_normal (biast); - create_input_operand (&ops[4], bias, QImode); - icode = convert_optab_handler (optab, TYPE_MODE (type), GET_MODE (mask)); - expand_insn (icode, 5, ops); - } - else - { - create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); - expand_insn (icode, 3, ops); - } + create_fixed_operand (&ops[i++], mem); + create_input_operand (&ops[i++], reg, TYPE_MODE (type)); + i = add_len_and_mask_args (ops, i, stmt); + expand_insn (icode, i, ops); } #define expand_mask_store_optab_fn expand_partial_store_optab_fn @@ -4482,6 +4463,25 @@ internal_gather_scatter_fn_p (internal_fn fn) } } +/* If FN takes a vector len argument, return the index of that argument, + otherwise return -1. */ + +int +internal_fn_len_index (internal_fn fn) +{ + switch (fn) + { + case IFN_LEN_LOAD: + case IFN_LEN_STORE: + case IFN_LEN_MASK_LOAD: + case IFN_LEN_MASK_STORE: + return 2; + + default: + return -1; + } +} + /* If FN takes a vector mask argument, return the index of that argument, otherwise return -1. */ @@ -4498,11 +4498,9 @@ internal_fn_mask_index (internal_fn fn) case IFN_MASK_GATHER_LOAD: case IFN_MASK_SCATTER_STORE: - return 4; - case IFN_LEN_MASK_LOAD: case IFN_LEN_MASK_STORE: - return 3; + return 4; default: return (conditional_internal_fn_code (fn) != ERROR_MARK @@ -4522,12 +4520,14 @@ internal_fn_stored_value_index (internal_fn fn) case IFN_MASK_STORE_LANES: case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: - case IFN_LEN_STORE: return 3; - case IFN_LEN_MASK_STORE: + case IFN_LEN_STORE: return 4; + case IFN_LEN_MASK_STORE: + return 5; + default: return -1; } @@ -4592,7 +4592,6 @@ internal_len_load_store_bias (internal_fn ifn, machine_mode mode) { optab optab = direct_internal_fn_optab (ifn); insn_code icode = direct_optab_handler (optab, mode); - int bias_opno = 3; if (icode == CODE_FOR_nothing) { @@ -4610,15 +4609,14 @@ internal_len_load_store_bias (internal_fn ifn, machine_mode mode) optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE); } icode = convert_optab_handler (optab, mode, mask_mode); - bias_opno = 4; } if (icode != CODE_FOR_nothing) { /* For now we only support biases of 0 or -1. Try both of them. */ - if (insn_operand_matches (icode, bias_opno, GEN_INT (0))) + if (insn_operand_matches (icode, 3, GEN_INT (0))) return 0; - if (insn_operand_matches (icode, bias_opno, GEN_INT (-1))) + if (insn_operand_matches (icode, 3, GEN_INT (-1))) return -1; } diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 8f21068e300d..4234bbfed87e 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -234,6 +234,7 @@ extern bool internal_load_fn_p (internal_fn); extern bool internal_store_fn_p (internal_fn); extern bool internal_gather_scatter_fn_p (internal_fn); extern int internal_fn_mask_index (internal_fn); +extern int internal_fn_len_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int); diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc index f8338037a611..9c6004cdce8e 100644 --- a/gcc/tree-ssa-dse.cc +++ b/gcc/tree-ssa-dse.cc @@ -161,12 +161,13 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false) case IFN_MASK_STORE: case IFN_LEN_MASK_STORE: { - int stored_value_index - = internal_fn_stored_value_index (gimple_call_internal_fn (stmt)); - if (gimple_call_internal_fn (stmt) == IFN_LEN_STORE) + internal_fn ifn = gimple_call_internal_fn (stmt); + int stored_value_index = internal_fn_stored_value_index (ifn); + int len_index = internal_fn_len_index (ifn); + if (ifn == IFN_LEN_STORE) { - tree len = gimple_call_arg (stmt, 2); - tree bias = gimple_call_arg (stmt, 4); + tree len = gimple_call_arg (stmt, len_index); + tree bias = gimple_call_arg (stmt, len_index + 1); if (tree_fits_uhwi_p (len)) { ao_ref_init_from_ptr_and_size (write, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 68faa8ead39d..a0c39268bf0b 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9122,13 +9122,14 @@ vectorizable_store (vec_info *vinfo, if (partial_ifn == IFN_LEN_MASK_STORE) call = gimple_build_call_internal (IFN_LEN_MASK_STORE, 6, dataref_ptr, ptr, - final_len, final_mask, - vec_oprnd, bias); + final_len, bias, + final_mask, vec_oprnd); else call = gimple_build_call_internal (IFN_LEN_STORE, 5, - dataref_ptr, ptr, final_len, - vec_oprnd, bias); + dataref_ptr, ptr, + final_len, bias, + vec_oprnd); gimple_call_set_nothrow (call, true); vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; @@ -10523,7 +10524,7 @@ vectorizable_load (vec_info *vinfo, call = gimple_build_call_internal (IFN_LEN_MASK_LOAD, 5, dataref_ptr, ptr, final_len, - final_mask, bias); + bias, final_mask); else call = gimple_build_call_internal (IFN_LEN_LOAD, 4, dataref_ptr, ptr, -- 2.47.3