libgomp: Add is_integrated_apu function to plugin/plugin-{gcn,nvptx}.c

author Tobias Burnus <tburnus@baylibre.com>

Fri, 10 Oct 2025 07:48:37 +0000 (09:48 +0200)

committer Tobias Burnus <tburnus@baylibre.com>

Fri, 10 Oct 2025 07:48:37 +0000 (09:48 +0200)
author Tobias Burnus <tburnus@baylibre.com>
Fri, 10 Oct 2025 07:48:37 +0000 (09:48 +0200)
committer Tobias Burnus <tburnus@baylibre.com>
Fri, 10 Oct 2025 07:48:37 +0000 (09:48 +0200)
diff --git a/include/hsa_ext_amd.h b/include/hsa_ext_amd.h

index c1c16536621a54a9ce400676f39cc1b9f1911f0d..e29e88090eb02a468e347faaa63e27d7c0762086 100644 (file)
--- a/include/hsa_ext_amd.h
+++ b/include/hsa_ext_amd.h
@@ -168,9 +168,17 @@ typedef enum hsa_amd_agent_info_s {
     * selective workarounds for hardware errata.
     * The type of this attribute is uint32_t.
     */
-  HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012
+  HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012,
+
+  /* Bitmask with memory properties of the agent.  */
+  HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES = 0xA114
  } hsa_amd_agent_info_t;
  
+
+enum {
+  HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU = (1 << 0)
+};
+
  typedef struct hsa_amd_hdp_flush_s {
    uint32_t* HDP_MEM_FLUSH_CNTL;
    uint32_t* HDP_REG_FLUSH_CNTL;
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c

index 18f01e090023e12da1b61f6dc1f0be4364a5ef60..cd5a19b03551a4769f900ee39cb5c4929de89cef 100644 (file)
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3331,6 +3331,61 @@ gcn_exec (struct kernel_info *kernel,
  /* }}}  */
  /* {{{ Generic Plugin API  */
  
+#if 0  /* TODO: Use to enable self-mapping/USM automatically.  */
+/* FIXME: The auto-self-map feature depends on still mapping 'declare target'
+   variables, even if ignoring all other mappings. Cf. PR 115279.  */
+
+/* Return TRUE if the GPU is an APU, i.e. the GPU is integrated with the CPU
+   such that both use the same memory controller such that mapping or memory
+   migration is pointless.  If CHECK_XNACK is TRUE, it additionally requires
+   that the GPU has *no* XNACK support otherwise FALSE is returned.
+
+   In theory, enabling unified-shared memory for APUs should always work,
+   however, with AMD GPUs some APUs (e.g. MI300A) still require XNACK to be
+   enabled as it is required to handle page faults.
+
+   Thus, for unified-shared memory access, either of the following must hold:
+   * HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is TRUE
+     This implies that all GPUs support USM access, either directly (as APU)
+     or via page migration.  For MI300A, this is only the case if
+     HSA_AMD_SYSTEM_INFO_XNACK_ENABLED is TRUE.
+   * If the GPU an APU *and* it does not support XNACK.  */
+
+static bool
+is_integrated_apu (struct agent_info *agent, bool check_xnack)
+{
+  enum {
+    HSACO_ATTR_UNSUPPORTED,
+    HSACO_ATTR_OFF,
+    HSACO_ATTR_ON,
+    HSACO_ATTR_ANY,
+    HSACO_ATTR_DEFAULT
+  };
+
+  bool is_apu;
+  uint8_t mem_prop[8];
+  hsa_status_t status;
+
+  status = hsa_fns.hsa_agent_get_info_fn (
+            agent->id, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES,
+            mem_prop);
+  _Static_assert (HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU < 8,
+                 "HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU < 8");
+  is_apu = (status == HSA_STATUS_SUCCESS
+           && (mem_prop[0] & (1 << HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU)));
+
+  if (check_xnack)
+    switch(agent->device_isa)
+      {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, ...) \
+      case ELF: return is_apu && (XNACK == HSACO_ATTR_UNSUPPORTED);
+#include "../../gcc/config/gcn/gcn-devices.def"
+      default: return false;  /* Just to be save.  */
+      }
+  return is_apu;
+}
+#endif
+
  /* Return the name of the accelerator, which is "gcn".  */
  
  const char *
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index eb7b5e59d8f7d21c7383b6effbc859fb661f20e0..92c62ee5b8686958c13b6fe06490b7ea345844aa 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1246,6 +1246,24 @@ nvptx_get_current_cuda_context (void)
    return nvthd->ptx_dev->ctx;
  }
  
+#if 0  /* TODO: Use to enable self-mapping/USM automatically.  */
+/* FIXME: The auto-self-map feature depends on still mapping 'declare target'
+   variables, even if ignoring all other mappings. Cf. PR 115279.  */
+
+/* Return TRUE if the GPU is integrated with host memory, i.e. GPU and
+   host share the same memory controller.  As of Oct 2025, no such
+   Nvidia GPU seems to exist.  */
+static bool
+is_integrated_apu (struct ptx_device *ptx_dev)
+{
+  int pi;
+  CUresult r;
+  r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
+                        CU_DEVICE_ATTRIBUTE_INTEGRATED, ptx_dev->dev);
+  return (r == CUDA_SUCCESS && pi == 1);
+}
+#endif
+
  /* Plugin entry points.  */
  
  const char *
author	Tobias Burnus <tburnus@baylibre.com>
	Fri, 10 Oct 2025 07:48:37 +0000 (09:48 +0200)
committer	Tobias Burnus <tburnus@baylibre.com>
	Fri, 10 Oct 2025 07:48:37 +0000 (09:48 +0200)
include/hsa_ext_amd.h		patch \| blob \| blame \| history
libgomp/plugin/plugin-gcn.c		patch \| blob \| blame \| history
libgomp/plugin/plugin-nvptx.c		patch \| blob \| blame \| history