]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
amdgcn: Support XNACK mode
authorAndrew Stubbs <ams@codesourcery.com>
Mon, 31 Jul 2023 16:52:06 +0000 (17:52 +0100)
committerAndrew Stubbs <ams@codesourcery.com>
Wed, 13 Dec 2023 15:30:42 +0000 (15:30 +0000)
The XNACK feature allows memory load instructions to restart safely following
a page-miss interrupt.  This is useful for shared-memory devices, like APUs,
and to implement OpenMP Unified Shared Memory.

To support the feature we must be able to set the appropriate meta-data and
set the load instructions to early-clobber.  When the port supports scheduling
of s_waitcnt instructions there will be further requirements.

gcc/ChangeLog:

* config/gcn/gcn-hsa.h (NO_XNACK): Ignore missing -march.
(XNACKOPT): Match on/off; ignore any.
* config/gcn/gcn-valu.md (gather<mode>_insn_1offset<exec>):
Add xnack compatible alternatives.
(gather<mode>_insn_2offsets<exec>): Likewise.
* config/gcn/gcn.cc (gcn_option_override): Permit -mxnack for devices
other than Fiji and gfx1030.
(gcn_expand_epilogue): Remove early-clobber problems.
(gcn_hsa_declare_function_name): Obey -mxnack setting.
* config/gcn/gcn.md (xnack): New attribute.
(enabled): Rework to include "xnack" attribute.
(*movbi): Add xnack compatible alternatives.
(*mov<mode>_insn): Likewise.
(*mov<mode>_insn): Likewise.
(*mov<mode>_insn): Likewise.
(*movti_insn): Likewise.
* config/gcn/gcn.opt (-mxnack): Change the default to "any".
* doc/invoke.texi: Remove placeholder notice for -mxnack.

gcc/config/gcn/gcn-hsa.h
gcc/config/gcn/gcn-valu.md
gcc/config/gcn/gcn.cc
gcc/config/gcn/gcn.md
gcc/config/gcn/gcn.opt
gcc/doc/invoke.texi

index 4d72299da9b77a34900b75f373d4cf1c46ab5044..bfb104526c52636ce2b20b4e9bde3c52f1a3f64d 100644 (file)
@@ -75,16 +75,13 @@ extern unsigned int gcn_local_sym_hash (const char *name);
    supported for gcn.  */
 #define GOMP_SELF_SPECS ""
 
-#define NO_XNACK "!march=*:;march=fiji:;march=gfx1030:;"
+#define NO_XNACK "march=fiji:;march=gfx1030:;"
 #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
 
 /* In HSACOv4 no attribute setting means the binary supports "any" hardware
    configuration.  The name of the attribute also changed.  */
 #define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
-
-/* Replace once XNACK is supported:
-   #define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"  */
-#define XNACKOPT "!mnack=*:-mattr=-xnack;mnack=*:-mattr=-xnack"
+#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
 
 /* Use LLVM assembler and linker options.  */
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
index a928decd408c00ed7ff1be1abd937769bd2351dc..64b8ea1057fc29740b53d247fa302ac2e7709794 100644 (file)
     {})
 
 (define_insn "gather<mode>_insn_1offset<exec>"
-  [(set (match_operand:V_MOV 0 "register_operand"                 "=v,a")
+  [(set (match_operand:V_MOV 0 "register_operand"                 "=v,a,&v,&a")
        (unspec:V_MOV
-         [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v")
+         [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v, v, v")
                        (vec_duplicate:<VnDI>
-                         (match_operand 2 "immediate_operand"     " n,n")))
-          (match_operand 3 "immediate_operand"                    " n,n")
-         (match_operand 4 "immediate_operand"                     " n,n")
+                         (match_operand 2 "immediate_operand"     " n,n, n, n")))
+          (match_operand 3 "immediate_operand"                    " n,n, n, n")
+          (match_operand 4 "immediate_operand"                    " n,n, n, n")
           (mem:BLK (scratch))]
          UNSPEC_GATHER))]
   "(AS_FLAT_P (INTVAL (operands[3]))
   }
   [(set_attr "type" "flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2")])
+   (set_attr "gcn_version" "*,cdna2,*,cdna2")
+   (set_attr "xnack" "off,off,on,on")])
 
 (define_insn "gather<mode>_insn_1offset_ds<exec>"
   [(set (match_operand:V_MOV 0 "register_operand"                 "=v,a")
    (set_attr "gcn_version" "*,cdna2")])
 
 (define_insn "gather<mode>_insn_2offsets<exec>"
-  [(set (match_operand:V_MOV 0 "register_operand"                    "=v,a")
+  [(set (match_operand:V_MOV 0 "register_operand"              "=v,a,&v,&a")
        (unspec:V_MOV
          [(plus:<VnDI>
             (plus:<VnDI>
               (vec_duplicate:<VnDI>
-                (match_operand:DI 1 "register_operand"               "Sv,Sv"))
+                (match_operand:DI 1 "register_operand"         "Sv,Sv,Sv,Sv"))
               (sign_extend:<VnDI>
-                (match_operand:<VnSI> 2 "register_operand"           " v,v")))
+                (match_operand:<VnSI> 2 "register_operand"     " v, v, v, v")))
             (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
-                                                                     " n,n")))
-          (match_operand 4 "immediate_operand"                       " n,n")
-          (match_operand 5 "immediate_operand"                       " n,n")
+                                                               " n, n, n, n")))
+          (match_operand 4 "immediate_operand"                 " n, n, n, n")
+          (match_operand 5 "immediate_operand"                 " n, n, n, n")
           (mem:BLK (scratch))]
          UNSPEC_GATHER))]
   "(AS_GLOBAL_P (INTVAL (operands[4]))
   }
   [(set_attr "type" "flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2")])
+   (set_attr "gcn_version" "*,cdna2,*,cdna2")
+   (set_attr "xnack" "off,off,on,on")])
 
 (define_expand "scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
index 031b405e810aa6b80f486cda20afec7c36f34b13..d92cd01d03f02f43b8436f6fa4562ae6d09b1652 100644 (file)
@@ -160,11 +160,18 @@ gcn_option_override (void)
        acc_lds_size = 32768;
     }
 
-  /* The xnack option is a placeholder, for now.  Before removing, update
-     gcn-hsa.h's XNACKOPT, gcn.opt's mxnack= default init+descr, and
-     invoke.texi's default description.  */
-  if (flag_xnack != HSACO_ATTR_OFF)
-    sorry ("XNACK support");
+  /* gfx803 "Fiji" and gfx1030 do not support XNACK.  */
+  if (gcn_arch == PROCESSOR_FIJI
+      || gcn_arch == PROCESSOR_GFX1030)
+    {
+      if (flag_xnack == HSACO_ATTR_ON)
+       error ("-mxnack=on is incompatible with -march=%s",
+              (gcn_arch == PROCESSOR_FIJI ? "fiji"
+               : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
+               : NULL));
+      /* Allow HSACO_ATTR_ANY silently because that's the default.  */
+      flag_xnack = HSACO_ATTR_OFF;
+    }
 }
 
 /* }}}  */
@@ -3585,18 +3592,20 @@ gcn_expand_epilogue (void)
       /* Assume that an exit value compatible with gcn-run is expected.
          That is, the third input parameter is an int*.
 
-         We can't allocate any new registers, but the kernarg_reg is
-         dead after this, so we'll use that.  */
+         We can't allocate any new registers, but the dispatch_ptr and
+        kernarg_reg are dead after this, so we'll use those.  */
+      rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
+                                         [DISPATCH_PTR_ARG]);
       rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
                                     [KERNARG_SEGMENT_PTR_ARG]);
       rtx retptr_mem = gen_rtx_MEM (DImode,
                                    gen_rtx_PLUS (DImode, kernarg_reg,
                                                  GEN_INT (16)));
       set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
-      emit_move_insn (kernarg_reg, retptr_mem);
+      emit_move_insn (dispatch_ptr_reg, retptr_mem);
 
       rtx retval_addr = gen_rtx_REG (DImode, FIRST_VPARM_REG + 2);
-      emit_move_insn (retval_addr, kernarg_reg);
+      emit_move_insn (retval_addr, dispatch_ptr_reg);
       rtx retval_mem = gen_rtx_MEM (SImode, retval_addr);
       set_mem_addr_space (retval_mem, ADDR_SPACE_FLAT);
       emit_move_insn (retval_mem, gen_rtx_REG (SImode, RETURN_VALUE_REG));
index b7fbbaf830b046c3bda613fd5c82239c8db4b3f6..c7f63d0a3ac8a20e7fd078acee991999c7914097 100644 (file)
 (define_attr "gcn_version" "gcn3,gcn5,cdna2" (const_string "gcn3"))
 (define_attr "rdna" "any,no,yes" (const_string "any"))
 
+(define_attr "xnack" "na,off,on" (const_string "na"))
+
 (define_attr "enabled" ""
   (cond [(and (eq_attr "rdna" "no")
              (ne (symbol_ref "TARGET_RDNA2") (const_int 0)))
         (and (eq_attr "rdna" "yes")
              (eq (symbol_ref "TARGET_RDNA2") (const_int 0)))
           (const_int 0)
-        (eq_attr "gcn_version" "gcn3") (const_int 1)
         (and (eq_attr "gcn_version" "gcn5")
-             (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
-          (const_int 1)
+             (eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
+          (const_int 0)
         (and (eq_attr "gcn_version" "cdna2")
-             (ne (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0)))
-          (const_int 1)]
-       (const_int 0)))
+             (eq (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0)))
+          (const_int 0)
+        (and (eq_attr "xnack" "off")
+             (ne (symbol_ref "TARGET_XNACK") (const_int 0)))
+          (const_int 0)
+        (and (eq_attr "xnack" "on")
+             (eq (symbol_ref "TARGET_XNACK") (const_int 0)))
+          (const_int 0)]
+       (const_int 1)))
 
 ; We need to be able to identify v_readlane and v_writelane with
 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
 
 (define_insn "*movbi"
   [(set (match_operand:BI 0 "nonimmediate_operand"
-                                   "=Sg,   v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
+                         "=Sg,   v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM")
        (match_operand:BI 1 "gcn_load_operand"
-                                   "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
+                         "SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))]
   ""
   {
     /* SCC as an operand is currently not accepted by the LLVM assembler, so
       return "s_mov_b32\tvcc_lo, %1\;"
             "s_mov_b32\tvcc_hi, 0";
     case 6:
-      return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
     case 7:
-      return "s_store_dword\t%1, %A0";
+      return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
     case 8:
-      return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
+      return "s_store_dword\t%1, %A0";
     case 9:
-      return "flat_store_dword\t%A0, %1%O0%g0";
     case 10:
-      return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
+      return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
     case 11:
+      return "flat_store_dword\t%A0, %1%O0%g0";
+    case 12:
+    case 13:
+      return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
+    case 14:
       return "global_store_dword\t%A0, %1%O0%g0";
     default:
       gcc_unreachable ();
     }
   }
-  [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
-                    flat,flat")
-   (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
+  [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
+                    flat,flat,flat,flat")
+   (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
+   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
 
 ; 32bit move pattern
 
   [(set (match_operand:SISF 0 "nonimmediate_operand")
        (match_operand:SISF 1 "gcn_load_operand"))]
   ""
-  {@ [cons: =0, 1; attrs: type, exec, length, gcn_version]
-   [SD  ,SSA ;sop1 ,*   ,4 ,*    ] s_mov_b32\t%0, %1
-   [SD  ,J   ;sopk ,*   ,4 ,*    ] s_movk_i32\t%0, %1
-   [SD  ,B   ;sop1 ,*   ,8 ,*    ] s_mov_b32\t%0, %1
-   [SD  ,RB  ;smem ,*   ,12,*    ] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
-   [RB  ,Sm  ;smem ,*   ,12,*    ] s_buffer_store%s1\t%1, s[0:3], %0
-   [Sm  ,RS  ;smem ,*   ,12,*    ] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-   [RS  ,Sm  ;smem ,*   ,12,*    ] s_store_dword\t%1, %A0
-   [v   ,v   ;vop1 ,*   ,4 ,*    ] v_mov_b32\t%0, %1
-   [Sg  ,v   ;vop3a,none,8 ,*    ] v_readlane_b32\t%0, %1, 0
-   [v   ,Sv  ;vop3a,none,8 ,*    ] v_writelane_b32\t%0, %1, 0
-   [v   ,^a  ;vop3p_mai,*,8,*    ] v_accvgpr_read_b32\t%0, %1
-   [a   ,v   ;vop3p_mai,*,8,*    ] v_accvgpr_write_b32\t%0, %1
-   [a   ,a   ;vop1 ,*    ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
-   [v   ,RF  ;flat ,*   ,12,*    ] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
-   [^a  ,RF  ;flat ,*   ,12,cdna2] ^
-   [RF  ,v   ;flat ,*   ,12,*    ] flat_store_dword\t%A0, %1%O0%g0
-   [RF  ,a   ;flat ,*   ,12,cdna2] ^
-   [v   ,B   ;vop1 ,*   ,8 ,*    ] v_mov_b32\t%0, %1
-   [RLRG,v   ;ds   ,*   ,12,*    ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-   [v   ,RLRG;ds   ,*   ,12,*    ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-   [SD  ,Y   ;sop1 ,*   ,8 ,*    ] s_mov_b32\t%0, %1
-   [v   ,RM  ;flat ,*   ,12,*    ] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-   [^a  ,RM  ;flat ,*   ,12,cdna2] ^
-   [RM  ,v   ;flat ,*   ,12,*    ] global_store_dword\t%A0, %1%O0%g0
-   [RM  ,a   ;flat ,*   ,12,cdna2] ^
+  {@ [cons: =0, 1; attrs: type, exec, length, gcn_version, xnack]
+   [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
+   [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
+   [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
+   [SD  ,RB  ;smem ,*   ,12,*    ,off] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
+   [&SD ,RB  ;smem ,*   ,12,*    ,on ] ^
+   [RB  ,Sm  ;smem ,*   ,12,*    ,*  ] s_buffer_store%s1\t%1, s[0:3], %0
+   [Sm  ,RS  ;smem ,*   ,12,*    ,off] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+   [&Sm ,RS  ;smem ,*   ,12,*    ,on ] ^
+   [RS  ,Sm  ;smem ,*   ,12,*    ,*  ] s_store_dword\t%1, %A0
+   [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ] v_mov_b32\t%0, %1
+   [Sg  ,v   ;vop3a,none,8 ,*    ,*  ] v_readlane_b32\t%0, %1, 0
+   [v   ,Sv  ;vop3a,none,8 ,*    ,*  ] v_writelane_b32\t%0, %1, 0
+   [v   ,^a  ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_read_b32\t%0, %1
+   [a   ,v   ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_write_b32\t%0, %1
+   [a   ,a   ;vop1 ,*    ,4,cdna2,*  ] v_accvgpr_mov_b32\t%0, %1
+   [v   ,RF  ;flat ,*   ,12,*    ,off] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
+   [&v  ,RF  ;flat ,*   ,12,*    ,on ] ^
+   [^a  ,RF  ;flat ,*   ,12,cdna2,off] ^
+   [&^a ,RF  ;flat ,*   ,12,cdna2,on ] ^
+   [RF  ,v   ;flat ,*   ,12,*    ,*  ] flat_store_dword\t%A0, %1%O0%g0
+   [RF  ,a   ;flat ,*   ,12,cdna2,*  ] ^
+   [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ] v_mov_b32\t%0, %1
+   [RLRG,v   ;ds   ,*   ,12,*    ,*  ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+   [v   ,RLRG;ds   ,*   ,12,*    ,*  ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+   [SD  ,Y   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
+   [v   ,RM  ;flat ,*   ,12,*    ,off] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+   [&v  ,RM  ;flat ,*   ,12,*    ,on ] ^
+   [^a  ,RM  ;flat ,*   ,12,cdna2,off] ^
+   [&^a ,RM  ;flat ,*   ,12,cdna2,on ] ^
+   [RM  ,v   ;flat ,*   ,12,*    ,*  ] global_store_dword\t%A0, %1%O0%g0
+   [RM  ,a   ;flat ,*   ,12,cdna2,*  ] ^
   })
 
 ; 8/16bit move pattern
   [(set (match_operand:QIHI 0 "nonimmediate_operand")
        (match_operand:QIHI 1 "gcn_load_operand"))]
   "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
-  {@ [cons: =0, 1; attrs: type, exec, length, gcn_version]
-  [SD  ,SSA ;sop1 ,*   ,4 ,*    ] s_mov_b32\t%0, %1
-  [SD  ,J   ;sopk ,*   ,4 ,*    ] s_movk_i32\t%0, %1
-  [SD  ,B   ;sop1 ,*   ,8 ,*    ] s_mov_b32\t%0, %1
-  [v   ,v   ;vop1 ,*   ,4 ,*    ] v_mov_b32\t%0, %1
-  [Sg  ,v   ;vop3a,none,4 ,*    ] v_readlane_b32\t%0, %1, 0
-  [v   ,Sv  ;vop3a,none,4 ,*    ] v_writelane_b32\t%0, %1, 0
-  [v   ,^a  ;vop3p_mai,*,8,*    ] v_accvgpr_read_b32\t%0, %1
-  [a   ,v   ;vop3p_mai,*,8,*    ] v_accvgpr_write_b32\t%0, %1
-  [a   ,a   ;vop1 ,*    ,8,cdna2] v_accvgpr_mov_b32\t%0, %1
-  [v    ,RF ;flat ,*   ,12,*    ] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [^a   ,RF ;flat ,*   ,12,cdna2] ^
-  [RF  ,v   ;flat ,*   ,12,*    ] flat_store%s0\t%A0, %1%O0%g0
-  [RF  ,a   ;flat ,*   ,12,cdna2] ^
-  [v   ,B   ;vop1 ,*   ,8 ,*    ] v_mov_b32\t%0, %1
-  [RLRG,v   ;ds   ,*   ,12,*    ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RLRG;ds   ,*   ,12,*    ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RM  ;flat ,*   ,12,*    ] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [^a  ,RM  ;flat ,*   ,12,cdna2] ^
-  [RM  ,v   ;flat ,*   ,12,*    ] global_store%s0\t%A0, %1%O0%g0
-  [RM  ,a   ;flat ,*   ,12,cdna2] ^
+  {@ [cons: =0, 1; attrs: type, exec, length, gcn_version, xnack]
+  [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
+  [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
+  [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
+  [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ] v_mov_b32\t%0, %1
+  [Sg  ,v   ;vop3a,none,4 ,*    ,*  ] v_readlane_b32\t%0, %1, 0
+  [v   ,Sv  ;vop3a,none,4 ,*    ,*  ] v_writelane_b32\t%0, %1, 0
+  [v   ,^a  ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_read_b32\t%0, %1
+  [a   ,v   ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_write_b32\t%0, %1
+  [a   ,a   ;vop1 ,*    ,8,cdna2,*  ] v_accvgpr_mov_b32\t%0, %1
+  [v   ,RF  ;flat ,*   ,12,*    ,off] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
+  [&v  ,RF  ;flat ,*   ,12,*    ,on ] ^
+  [^a  ,RF  ;flat ,*   ,12,cdna2,off] ^
+  [&^a ,RF  ;flat ,*   ,12,cdna2,on ] ^
+  [RF  ,v   ;flat ,*   ,12,*    ,*  ] flat_store%s0\t%A0, %1%O0%g0
+  [RF  ,a   ;flat ,*   ,12,cdna2,*  ] ^
+  [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ] v_mov_b32\t%0, %1
+  [RLRG,v   ;ds   ,*   ,12,*    ,*  ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RLRG;ds   ,*   ,12,*    ,*  ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RM  ;flat ,*   ,12,*    ,off] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v  ,RM  ;flat ,*   ,12,*    ,on ] ^
+  [^a  ,RM  ;flat ,*   ,12,cdna2,off] ^
+  [&^a ,RM  ;flat ,*   ,12,cdna2,on ] ^
+  [RM  ,v   ;flat ,*   ,12,*    ,*  ] global_store%s0\t%A0, %1%O0%g0
+  [RM  ,a   ;flat ,*   ,12,cdna2,*  ] ^
   })
 
 ; 64bit move pattern
   [(set (match_operand:DIDF 0 "nonimmediate_operand")
        (match_operand:DIDF 1 "general_operand"))]
   "GET_CODE(operands[1]) != SYMBOL_REF"
-  {@ [cons: =0, 1; attrs: type, length, gcn_version]
-  [SD  ,SSA ;sop1 ,4 ,*    ] s_mov_b64\t%0, %1
-  [SD  ,C   ;sop1 ,8 ,*    ] ^
-  [SD  ,DB  ;mult ,* ,*    ] #
-  [RS  ,Sm  ;smem ,12,*    ] s_store_dwordx2\t%1, %A0
-  [Sm  ,RS  ;smem ,12,*    ] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,v   ;vmult,* ,*    ] #
-  [v   ,DB  ;vmult,* ,*    ] #
-  [Sg  ,v   ;vmult,* ,*    ] #
-  [v   ,Sv  ;vmult,* ,*    ] #
-  [v   ,^a  ;vmult,* ,*    ] #
-  [a   ,v   ;vmult,* ,*    ] #
-  [a   ,a   ;vmult,* ,cdna2] #
-  [v   ,RF  ;flat ,12,*    ] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [^a  ,RF  ;flat ,12,cdna2] ^
-  [RF  ,v   ;flat ,12,*    ] flat_store_dwordx2\t%A0, %1%O0%g0
-  [RF  ,a   ;flat ,12,cdna2] ^
-  [RLRG,v   ;ds   ,12,*    ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RLRG;ds   ,12,*    ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RM  ;flat ,12,*    ] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [^a  ,RM  ;flat ,12,cdna2] ^
-  [RM  ,v   ;flat ,12,*    ] global_store_dwordx2\t%A0, %1%O0%g0
-  [RM  ,a   ;flat ,12,cdna2] ^
+  {@ [cons: =0, 1; attrs: type, length, gcn_version, xnack]
+  [SD  ,SSA ;sop1 ,4 ,*    ,*  ] s_mov_b64\t%0, %1
+  [SD  ,C   ;sop1 ,8 ,*    ,*  ] ^
+  [SD  ,DB  ;mult ,* ,*    ,*  ] #
+  [RS  ,Sm  ;smem ,12,*    ,*  ] s_store_dwordx2\t%1, %A0
+  [Sm  ,RS  ;smem ,12,*    ,off] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  [&Sm ,RS  ;smem ,12,*    ,on ] ^
+  [v   ,v   ;vmult,* ,*    ,*  ] #
+  [v   ,DB  ;vmult,* ,*    ,*  ] #
+  [Sg  ,v   ;vmult,* ,*    ,*  ] #
+  [v   ,Sv  ;vmult,* ,*    ,*  ] #
+  [v   ,^a  ;vmult,* ,*    ,*  ] #
+  [a   ,v   ;vmult,* ,*    ,*  ] #
+  [a   ,a   ;vmult,* ,cdna2,*  ] #
+  [v   ,RF  ;flat ,12,*    ,off] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
+  [&v  ,RF  ;flat ,12,*    ,on ] ^
+  [^a  ,RF  ;flat ,12,cdna2,off] ^
+  [&^a ,RF  ;flat ,12,cdna2,on ] ^
+  [RF  ,v   ;flat ,12,*    ,*  ] flat_store_dwordx2\t%A0, %1%O0%g0
+  [RF  ,a   ;flat ,12,cdna2,*  ] ^
+  [RLRG,v   ;ds   ,12,*    ,*  ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RLRG;ds   ,12,*    ,*  ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RM  ;flat ,12,*    ,off] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v  ,RM  ;flat ,12,*    ,on ] ^
+  [^a  ,RM  ;flat ,12,cdna2,off] ^
+  [&^a ,RM  ;flat ,12,cdna2,on ] ^
+  [RM  ,v   ;flat ,12,*    ,*  ] global_store_dwordx2\t%A0, %1%O0%g0
+  [RM  ,a   ;flat ,12,cdna2,*  ] ^
   }
   "reload_completed
    && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
   [(set (match_operand:TI 0 "nonimmediate_operand")
        (match_operand:TI 1 "general_operand"  ))]
   ""
-  {@ [cons: =0, 1; attrs: type, delayeduse, length, gcn_version]
-  [SD,SSB;mult ,*  ,* ,*    ] #
-  [RS,Sm ;smem ,*  ,12,*    ] s_store_dwordx4\t%1, %A0
-  [Sm,RS ;smem ,yes,12,*    ] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-  [RF,v  ;flat ,*  ,12,*    ] flat_store_dwordx4\t%A0, %1%O0%g0
-  [RF,a  ;flat ,*  ,12,cdna2] ^
-  [v ,RF ;flat ,*  ,12,*    ] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [^a,RF ;flat ,*  ,12,cdna2] ^
-  [v ,v  ;vmult,*  ,* ,*    ] #
-  [v ,Sv ;vmult,*  ,* ,*    ] #
-  [SD,v  ;vmult,*  ,* ,*    ] #
-  [RM,v  ;flat ,yes,12,*    ] global_store_dwordx4\t%A0, %1%O0%g0
-  [RM,a  ;flat ,yes,12,cdna2] ^
-  [v ,RM ;flat ,*  ,12,*    ] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [^a,RM ;flat ,*  ,12,cdna2] ^
-  [RL,v  ;ds   ,*  ,12,*    ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v ,RL ;ds   ,*  ,12,*    ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v ,^a ;vmult,*  ,* ,*    ] #
-  [a ,v  ;vmult,*  ,* ,*    ] #
-  [a ,a  ;vmult,*  ,* ,cdna2] #
+  {@ [cons: =0, 1; attrs: type, delayeduse, length, gcn_version, xnack]
+  [SD ,SSB;mult ,*  ,* ,*    ,*  ] #
+  [RS ,Sm ;smem ,*  ,12,*    ,*  ] s_store_dwordx4\t%1, %A0
+  [Sm ,RS ;smem ,yes,12,*    ,off] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  [&Sm,RS ;smem ,yes,12,*    ,on ] ^
+  [RF ,v  ;flat ,*  ,12,*    ,*  ] flat_store_dwordx4\t%A0, %1%O0%g0
+  [RF ,a  ;flat ,*  ,12,cdna2,*  ] ^
+  [v  ,RF ;flat ,*  ,12,*    ,off] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
+  [&v ,RF ;flat ,*  ,12,*    ,on ] ^
+  [^a ,RF ;flat ,*  ,12,cdna2,off] ^
+  [&^a,RF ;flat ,*  ,12,cdna2,on ] ^
+  [v  ,v  ;vmult,*  ,* ,*    ,*  ] #
+  [v  ,Sv ;vmult,*  ,* ,*    ,*  ] #
+  [SD ,v  ;vmult,*  ,* ,*    ,*  ] #
+  [RM ,v  ;flat ,yes,12,*    ,*  ] global_store_dwordx4\t%A0, %1%O0%g0
+  [RM ,a  ;flat ,yes,12,cdna2,*  ] ^
+  [v  ,RM ;flat ,*  ,12,*    ,off] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v ,RM ;flat ,*  ,12,*    ,on ] ^
+  [^a ,RM ;flat ,*  ,12,cdna2,off] ^
+  [&^a,RM ;flat ,*  ,12,cdna2,on ] ^
+  [RL ,v  ;ds   ,*  ,12,*    ,*  ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v  ,RL ;ds   ,*  ,12,*    ,*  ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v  ,^a ;vmult,*  ,* ,*    ,*  ] #
+  [a  ,v  ;vmult,*  ,* ,*    ,*  ] #
+  [a  ,a  ;vmult,*  ,* ,cdna2,*  ] #
   }
   "reload_completed
    && REG_P (operands[0])
   (clobber (reg:BI SCC_REG))]
  "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
   {
+    /* This s_load may not be XNACK-safe on devices where the GOT may fault.
+       DGPUs are most likely fine.  */
     if (SYMBOL_REF_P (operands[1])
        && SYMBOL_REF_WEAK (operands[1]))
        return "s_getpc_b64\t%0\;"
   {
     /* !!! These sequences clobber CC_SAVE_REG.  */
 
+    /* This s_load may not be XNACK-safe on devices where the GOT may fault.
+       DGPUs are most likely fine.  */
     if (SYMBOL_REF_P (operands[1])
        && SYMBOL_REF_WEAK (operands[1]))
        return "s_mov_b32\ts22, scc\;"
index e5db6df92d772a501c33fbc717df11a441dda064..c356a0cbb0894a51f4e33ffe9a8f111d5693f6a6 100644 (file)
@@ -98,8 +98,8 @@ EnumValue
 Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY)
 
 mxnack=
-Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_OFF)
-Compile for devices requiring XNACK enabled. Default \"off\".
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY)
+Compile for devices requiring XNACK enabled. Default \"any\".
 
 msram-ecc=
 Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
index 19feba467a460b48de86b6de1d981851adae9351..db039c47220963496c32427a941dd32b4e9741d1 100644 (file)
@@ -21628,8 +21628,7 @@ run-time performance.  The default is 32KB when using OpenACC or OpenMP, and
 Compile binaries suitable for devices with the XNACK feature enabled, disabled,
 or either mode.  Some devices always require XNACK and some allow the user to
 configure XNACK.  The compiled code must match the device mode.
-@c The default is @samp{-mxnack=any}.
-At present this option is a placeholder for support that is not yet implemented.
+The default is @samp{-mxnack=any}.
 
 @end table