From 351fa55c58a036f148d13bca972e687a0bacd113 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 12 Jun 2025 16:58:33 +0000 Subject: [PATCH] amdgcn: add DImode offsets for gather/scatter Add new variant of he gather_load and scatter_store instructions that take the offsets in DImode. This is not the natural width for offsets in the instruction set, but we can use them to compute a vector of absolute addresses, which does work. This enables the autovectorizer to use gather/scatter in a number of additional scenarios (one of which shows up in the SPEC HPC lbm benchmark). gcc/ChangeLog: * config/gcn/gcn-valu.md (gather_load): New. (scatter_store): New. (mask_gather_load): New. (mask_scatter_store): New. * config/gcn/gcn.cc (gcn_expand_scaled_offsets): Support DImode. --- gcc/config/gcn/gcn-valu.md | 81 ++++++++++++++++++++++++++++++++++++++ gcc/config/gcn/gcn.cc | 34 ++++++++++------ 2 files changed, 103 insertions(+), 12 deletions(-) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index dfa6b1523bd..3899117f271 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1133,6 +1133,23 @@ DONE; }) +(define_expand "gather_load" + [(match_operand:V_MOV 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand: 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), NULL); + + emit_insn (gen_gather_insn_1offset (operands[0], addr, const0_rtx, + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "gather_expr" [(set (match_operand:V_MOV 0 "register_operand") @@ -1259,6 +1276,23 @@ DONE; }) +(define_expand "scatter_store" + [(match_operand:DI 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), NULL); + + emit_insn (gen_scatter_insn_1offset (addr, const0_rtx, operands[4], + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "scatter_expr" [(set (mem:BLK (scratch)) @@ -4222,6 +4256,32 @@ DONE; }) +(define_expand "mask_gather_load" + [(set:V_MOV (match_operand:V_MOV 0 "register_operand") + (unspec:V_MOV + [(match_operand:DI 1 "register_operand") + (match_operand: 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "") + (match_operand:V_MOV 6 "maskload_else_operand")] + UNSPEC_GATHER))] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), exec); + + emit_insn (gen_gather_insn_1offset_exec (operands[0], addr, + const0_rtx, const0_rtx, + const0_rtx, + gcn_gen_undef (mode), + exec)); + DONE; + }) + (define_expand "mask_scatter_store" [(match_operand:DI 0 "register_operand") (match_operand: 1 "register_operand") @@ -4250,6 +4310,27 @@ DONE; }) +(define_expand "mask_scatter_store" + [(match_operand:DI 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand") + (match_operand:DI 5 "")] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), exec); + + emit_insn (gen_scatter_insn_1offset_exec (addr, const0_rtx, + operands[4], const0_rtx, + const0_rtx, exec)); + DONE; + }) + (define_code_iterator cond_op [plus minus mult]) (define_expand "cond_" diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 81a8578cf5d..3b26d5c6a58 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -2307,36 +2307,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, Return values. ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses. - ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */ + ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. + 64-bit offsets - return VnDImode vector of absolute addresses. */ rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, bool unsigned_p, rtx exec) { int vf = GET_MODE_NUNITS (GET_MODE (offsets)); - rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode)); - rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode)); + rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets)); + rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode)); + bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode; if (CONST_INT_P (scale) && INTVAL (scale) > 0 && exact_log2 (INTVAL (scale)) >= 0) - emit_insn (gen_ashlvNsi3 (tmpsi, offsets, - GEN_INT (exact_log2 (INTVAL (scale))), - NULL, exec)); + emit_insn (gen_ashlvNm3 (scaled_offsets, offsets, + GEN_INT (exact_log2 (INTVAL (scale))), + NULL, exec)); else - emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec)); + emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec)); + /* No instructions support DImode offsets. */ + if (use_di) + { + emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec)); + return abs_addr; + } /* "Global" instructions do not support negative register offsets. */ - if (as == ADDR_SPACE_FLAT || !unsigned_p) + else if (as == ADDR_SPACE_FLAT || !unsigned_p) { if (unsigned_p) - emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec)); + emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base, + NULL, exec)); else - emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec)); - return tmpdi; + emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base, + NULL, exec)); + return abs_addr; } else if (as == ADDR_SPACE_GLOBAL) - return tmpsi; + return scaled_offsets; gcc_unreachable (); } -- 2.47.2