]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
amdgcn: add DImode offsets for gather/scatter
authorAndrew Stubbs <ams@baylibre.com>
Thu, 12 Jun 2025 16:58:33 +0000 (16:58 +0000)
committerAndrew Stubbs <ams@baylibre.com>
Mon, 21 Jul 2025 11:13:20 +0000 (11:13 +0000)
Add new variant of he gather_load and scatter_store instructions that take the
offsets in DImode.  This is not the natural width for offsets in the
instruction set, but we can use them to compute a vector of absolute addresses,
which does work.

This enables the autovectorizer to use gather/scatter in a number of additional
scenarios (one of which shows up in the SPEC HPC lbm benchmark).

gcc/ChangeLog:

* config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
(scatter_store<mode><vndi>): New.
(mask_gather_load<mode><vndi>): New.
(mask_scatter_store<mode><vndi>): New.
* config/gcn/gcn.cc (gcn_expand_scaled_offsets): Support DImode.

gcc/config/gcn/gcn-valu.md
gcc/config/gcn/gcn.cc

index dfa6b1523bd78a6b556e4229b31a3150bb5a0875..3899117f2719a2eceedbecfefbb101c68a7ab3a7 100644 (file)
     DONE;
   })
 
+(define_expand "gather_load<mode><vndi>"
+  [(match_operand:V_MOV 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")]
+  ""
+  {
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+                                         operands[2], operands[4],
+                                         INTVAL (operands[3]), NULL);
+
+    emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+                                             const0_rtx, const0_rtx));
+    DONE;
+  })
+
 ; Allow any address expression
 (define_expand "gather<mode>_expr<exec>"
   [(set (match_operand:V_MOV 0 "register_operand")
     DONE;
   })
 
+(define_expand "scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_MOV 4 "register_operand")]
+  ""
+  {
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+                                         operands[1], operands[3],
+                                         INTVAL (operands[2]), NULL);
+
+    emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+                                              const0_rtx, const0_rtx));
+    DONE;
+  })
+
 ; Allow any address expression
 (define_expand "scatter<mode>_expr<exec_scatter>"
   [(set (mem:BLK (scratch))
     DONE;
   })
 
+(define_expand "mask_gather_load<mode><vndi>"
+  [(set:V_MOV (match_operand:V_MOV 0 "register_operand")
+             (unspec:V_MOV
+               [(match_operand:DI 1 "register_operand")
+                (match_operand:<VnDI> 2 "register_operand")
+                (match_operand 3 "immediate_operand")
+                (match_operand:SI 4 "gcn_alu_operand")
+                (match_operand:DI 5 "")
+                (match_operand:V_MOV 6 "maskload_else_operand")]
+               UNSPEC_GATHER))]
+  ""
+  {
+    rtx exec = force_reg (DImode, operands[5]);
+
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+                                         operands[2], operands[4],
+                                         INTVAL (operands[3]), exec);
+
+    emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+                                                  const0_rtx, const0_rtx,
+                                                  const0_rtx,
+                                                  gcn_gen_undef (<MODE>mode),
+                                                  exec));
+    DONE;
+  })
+
 (define_expand "mask_scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")
     DONE;
   })
 
+(define_expand "mask_scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_MOV 4 "register_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx exec = force_reg (DImode, operands[5]);
+
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+                                         operands[1], operands[3],
+                                         INTVAL (operands[2]), exec);
+
+    emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+                                                   operands[4], const0_rtx,
+                                                   const0_rtx, exec));
+    DONE;
+  })
+
 (define_code_iterator cond_op [plus minus mult])
 
 (define_expand "cond_<expander><mode>"
index 81a8578cf5d1562e294124bc43f2b88445cde416..3b26d5c6a58521fac4388dbd42b4da59dd77dd5e 100644 (file)
@@ -2307,36 +2307,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
 
    Return values.
      ADDR_SPACE_FLAT   - return VnDImode vector of absolute addresses.
-     ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.  */
+     ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.
+     64-bit offsets    - return VnDImode vector of absolute addresses. */
 
 rtx
 gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
                           bool unsigned_p, rtx exec)
 {
   int vf = GET_MODE_NUNITS (GET_MODE (offsets));
-  rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
-  rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
+  rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets));
+  rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode));
+  bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode;
 
   if (CONST_INT_P (scale)
       && INTVAL (scale) > 0
       && exact_log2 (INTVAL (scale)) >= 0)
-    emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
-                             GEN_INT (exact_log2 (INTVAL (scale))),
-                             NULL, exec));
+    emit_insn (gen_ashlvNm3 (scaled_offsets, offsets,
+                            GEN_INT (exact_log2 (INTVAL (scale))),
+                            NULL, exec));
   else
-     emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
+     emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec));
 
+  /* No instructions support DImode offsets.  */
+  if (use_di)
+    {
+      emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec));
+      return abs_addr;
+    }
   /* "Global" instructions do not support negative register offsets.  */
-  if (as == ADDR_SPACE_FLAT || !unsigned_p)
+  else if (as == ADDR_SPACE_FLAT || !unsigned_p)
     {
       if (unsigned_p)
-        emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
+       emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base,
+                                          NULL, exec));
       else
-        emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
-      return tmpdi;
+       emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base,
+                                          NULL, exec));
+      return abs_addr;
     }
   else if (as == ADDR_SPACE_GLOBAL)
-    return tmpsi;
+    return scaled_offsets;
 
   gcc_unreachable ();
 }