From 47cfa2d657edb9eddc8836059f02d81cbebad2e5 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Mon, 6 Mar 2023 12:42:44 +0000 Subject: [PATCH] amdgcn: gather/scatter with DImode offsets The GPU architecture requires SImode offsets on gather/scatter instructions, but they can also take a vector of absolute addresses, so this allows gather/scatter in more situations. gcc/ChangeLog: * config/gcn/gcn-valu.md (gather_load): New. (scatter_store): New. (mask_gather_load): New. (mask_scatter_store): New. --- gcc/ChangeLog.omp | 7 +++ gcc/config/gcn/gcn-valu.md | 123 +++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 6d88f486fa7c..3a3e54bdeb23 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,10 @@ +2023-03-17 Andrew Stubbs + + * config/gcn/gcn-valu.md (gather_load): New. + (scatter_store): New. + (mask_gather_load): New. + (mask_scatter_store): New. + 2023-03-17 Andrew Stubbs * config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New. diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 1bfd358cf104..70e3fa63c53d 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -961,6 +961,34 @@ ;; ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on +(define_expand "gather_load" + [(match_operand:V_ALL 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand: 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx vec_base = gen_reg_rtx (mode); + rtx addr = gen_reg_rtx (mode); + rtx multiplier = gen_reg_rtx (mode); + rtx offsets = gen_reg_rtx (mode); + + if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1) + { + emit_insn (gen_vec_duplicate (multiplier, operands[4])); + emit_insn (gen_mul3 (offsets, operands[2], multiplier)); + } + else + offsets = operands[2]; + emit_insn (gen_vec_duplicate (vec_base, operands[1])); + emit_insn (gen_add3 (addr, vec_base, offsets)); + + emit_insn (gen_gather_insn_1offset (operands[0], addr, const0_rtx, + const0_rtx, const0_rtx)); + DONE; + }) + (define_expand "gather_load" [(match_operand:V_ALL 0 "register_operand") (match_operand:DI 1 "register_operand") @@ -1091,6 +1119,34 @@ (set_attr "length" "12") (set_attr "xnack" "off,on")]) +(define_expand "scatter_store" + [(match_operand:DI 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_ALL 4 "register_operand")] + "" + { + rtx vec_base = gen_reg_rtx (mode); + rtx addr = gen_reg_rtx (mode); + rtx multiplier = gen_reg_rtx (mode); + rtx offsets = gen_reg_rtx (mode); + + if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1) + { + emit_insn (gen_vec_duplicate (multiplier, operands[3])); + emit_insn (gen_mul3 (offsets, operands[1], multiplier)); + } + else + offsets = operands[1]; + emit_insn (gen_vec_duplicate (vec_base, operands[0])); + emit_insn (gen_add3 (addr, vec_base, offsets)); + + emit_insn (gen_scatter_insn_1offset (addr, const0_rtx, operands[4], + const0_rtx, const0_rtx)); + DONE; + }) + (define_expand "scatter_store" [(match_operand:DI 0 "register_operand") (match_operand: 1 "register_operand") @@ -3528,6 +3584,41 @@ DONE; }) +(define_expand "mask_gather_load" + [(match_operand:V_ALL 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand: 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "")] + "" + { + rtx vec_base = gen_reg_rtx (mode); + rtx addr = gen_reg_rtx (mode); + rtx multiplier = gen_reg_rtx (mode); + rtx offsets = gen_reg_rtx (mode); + rtx exec = force_reg (DImode, operands[5]); + + if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1) + { + emit_insn (gen_vec_duplicate (multiplier, operands[4])); + emit_insn (gen_mul3 (offsets, operands[2], multiplier)); + } + else + offsets = operands[2]; + emit_insn (gen_vec_duplicate (vec_base, operands[1])); + emit_insn (gen_add3 (addr, vec_base, offsets)); + + /* Masked lanes are required to hold zero. */ + emit_move_insn (operands[0], gcn_vec_constant (mode, 0)); + + emit_insn (gen_gather_insn_1offset_exec (operands[0], addr, + const0_rtx, const0_rtx, + const0_rtx, operands[0], + exec)); + DONE; + }) + (define_expand "mask_gather_load" [(match_operand:V_ALL 0 "register_operand") (match_operand:DI 1 "register_operand") @@ -3559,6 +3650,38 @@ DONE; }) +(define_expand "mask_scatter_store" + [(match_operand:DI 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:DI 3 "gcn_alu_operand") + (match_operand:V_ALL 4 "register_operand") + (match_operand:DI 5 "")] + "" + { + rtx vec_base = gen_reg_rtx (mode); + rtx addr = gen_reg_rtx (mode); + rtx multiplier = gen_reg_rtx (mode); + rtx offsets = gen_reg_rtx (mode); + rtx exec = force_reg (DImode, operands[5]); + + if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1) + { + emit_insn (gen_vec_duplicate (multiplier, operands[3])); + emit_insn (gen_mul3 (offsets, operands[1], multiplier)); + } + else + offsets = operands[1]; + emit_insn (gen_vec_duplicate (vec_base, operands[0])); + emit_insn (gen_add3 (addr, vec_base, offsets)); + + emit_insn (gen_scatter_insn_1offset_exec (addr, const0_rtx, + operands[4], const0_rtx, + const0_rtx, + exec)); + DONE; + }) + (define_expand "mask_scatter_store" [(match_operand:DI 0 "register_operand") (match_operand: 1 "register_operand") -- 2.47.2