From 96e97a562d067a6d867862db79864cc66aae99c2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Tue, 2 Dec 2025 16:12:41 +0100 Subject: [PATCH] drm/amdgpu: Drop MMIO_REMAP domain bit and keep it Internal MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit "AMDGPU_GEM_DOMAIN_MMIO_REMAP" - Never activated as UAPI and it turned out that this was to inflexible. Allocate the MMIO_REMAP buffer object as a regular GEM BO and explicitly move it into the fixed AMDGPU_PL_MMIO_REMAP placement at the TTM level. This avoids relying on GEM domain bits for MMIO_REMAP, keeps the placement purely internal, and makes the lifetime and pinning of the global MMIO_REMAP BO explicit. The BO is pinned in TTM so it cannot be migrated or evicted. The corresponding free path relies on normal DRM teardown ordering, where no further user ioctls can access the global BO once TTM teardown begins. v2 (Srini): - Updated patch title. - Drop use of AMDGPU_GEM_DOMAIN_MMIO_REMAP in amdgpu_ttm.c. The MMIO_REMAP domain bit is removed from UAPI, so keep the MMIO_REMAP BO allocation domain-less (bp.domain = 0) and rely on the TTM placement (AMDGPU_PL_MMIO_REMAP) for backing/pinning. - Keep fdinfo/mem-stats visibility for MMIO_REMAP by classifying BOs based on bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP, since the domain bit is removed. v3: Squash patches #1 & #3 Fixes: 056132483724 ("drm/amdgpu/uapi: Introduce AMDGPU_GEM_DOMAIN_MMIO_REMAP") Fixes: 2a7a794eb82c ("drm/amdgpu/ttm: Allocate/Free 4K MMIO_REMAP Singleton") Cc: Alex Deucher Cc: Christian König Cc: Leo Liu Cc: Ruijing Dong Cc: David (Ming Qiang) Wu Signed-off-by: Srinivasan Shanmugam Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 77 ++++++++++++++-------- include/uapi/drm/amdgpu_drm.h | 6 +- 5 files changed, 60 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 032971d0a3cc6..ab899709c2609 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -417,9 +417,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, /* always clear VRAM */ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; - if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP) - return -EINVAL; - /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b676310ce9acd..1fb9564006966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -153,14 +153,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } - if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) { - places[c].fpfn = 0; - places[c].lpfn = 0; - places[c].mem_type = AMDGPU_PL_MMIO_REMAP; - places[c].flags = 0; - c++; - } - if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -1546,8 +1538,17 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) */ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) { - uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; + u32 domain; + /* + * MMIO_REMAP is internal now, so it no longer maps from a userspace + * domain bit. Keep fdinfo/mem-stats visibility by checking the actual + * TTM placement. + */ + if (bo->tbo.resource && bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) + return AMDGPU_PL_MMIO_REMAP; + + domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; if (!domain) return TTM_PL_SYSTEM; @@ -1566,8 +1567,6 @@ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) return AMDGPU_PL_OA; case AMDGPU_GEM_DOMAIN_DOORBELL: return AMDGPU_PL_DOORBELL; - case AMDGPU_GEM_DOMAIN_MMIO_REMAP: - return AMDGPU_PL_MMIO_REMAP; default: return TTM_PL_SYSTEM; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 52c2d1731aabf..912c9afaf9e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -168,8 +168,6 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) return AMDGPU_GEM_DOMAIN_OA; case AMDGPU_PL_DOORBELL: return AMDGPU_GEM_DOMAIN_DOORBELL; - case AMDGPU_PL_MMIO_REMAP: - return AMDGPU_GEM_DOMAIN_MMIO_REMAP; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index cfbcce9c27c5b..15d561e3d87f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1909,42 +1909,45 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) } /** - * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO + * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton MMIO_REMAP BO * @adev: amdgpu device * - * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the + * Allocates a global BO with backing AMDGPU_PL_MMIO_REMAP when the * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular * GEM object (amdgpu_bo_create). * - * The BO is created as a normal GEM object via amdgpu_bo_create(), then - * reserved and pinned at the TTM level (ttm_bo_pin()) so it can never be - * migrated or evicted. No CPU mapping is established here. - * * Return: * * 0 on success or intentional skip (feature not present/unsupported) * * negative errno on allocation failure */ -static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) +static int amdgpu_ttm_alloc_mmio_remap_bo(struct amdgpu_device *adev) { + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement; + struct ttm_buffer_object *tbo; + struct ttm_place placements; struct amdgpu_bo_param bp; + struct ttm_resource *tmp; int r; /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE) return 0; + /* + * Allocate a BO first and then move it to AMDGPU_PL_MMIO_REMAP. + * The initial TTM resource assigned by amdgpu_bo_create() is + * replaced below with a fixed MMIO_REMAP placement. + */ memset(&bp, 0, sizeof(bp)); - - /* Create exactly one GEM BO in the MMIO_REMAP domain. */ - bp.type = ttm_bo_type_device; /* userspace-mappable GEM */ - bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */ + bp.type = ttm_bo_type_device; + bp.size = AMDGPU_GPU_PAGE_SIZE; bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP; + bp.domain = 0; bp.flags = 0; bp.resv = NULL; bp.bo_ptr_size = sizeof(struct amdgpu_bo); - r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo); if (r) return r; @@ -1953,42 +1956,60 @@ static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) if (r) goto err_unref; + tbo = &adev->rmmio_remap.bo->tbo; + /* * MMIO_REMAP is a fixed I/O placement (AMDGPU_PL_MMIO_REMAP). - * Use TTM-level pin so the BO cannot be evicted/migrated, - * independent of GEM domains. This - * enforces the “fixed I/O window” */ - ttm_bo_pin(&adev->rmmio_remap.bo->tbo); + placement.num_placement = 1; + placement.placement = &placements; + placements.fpfn = 0; + placements.lpfn = 0; + placements.mem_type = AMDGPU_PL_MMIO_REMAP; + placements.flags = 0; + /* Force the BO into the fixed MMIO_REMAP placement */ + r = ttm_bo_mem_space(tbo, &placement, &tmp, &ctx); + if (unlikely(r)) + goto err_unlock; + + ttm_resource_free(tbo, &tbo->resource); + ttm_bo_assign_mem(tbo, tmp); + ttm_bo_pin(tbo); amdgpu_bo_unreserve(adev->rmmio_remap.bo); return 0; +err_unlock: + amdgpu_bo_unreserve(adev->rmmio_remap.bo); + err_unref: - if (adev->rmmio_remap.bo) - amdgpu_bo_unref(&adev->rmmio_remap.bo); + amdgpu_bo_unref(&adev->rmmio_remap.bo); adev->rmmio_remap.bo = NULL; return r; } /** - * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO + * amdgpu_ttm_free_mmio_remap_bo - Free the singleton MMIO_REMAP BO * @adev: amdgpu device * * Frees the kernel-owned MMIO_REMAP BO if it was allocated by * amdgpu_ttm_mmio_remap_bo_init(). */ -static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev) +static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev) { - struct amdgpu_bo *bo = adev->rmmio_remap.bo; - - if (!bo) - return; /* <-- safest early exit */ + if (!adev->rmmio_remap.bo) + return; if (!amdgpu_bo_reserve(adev->rmmio_remap.bo, true)) { ttm_bo_unpin(&adev->rmmio_remap.bo->tbo); amdgpu_bo_unreserve(adev->rmmio_remap.bo); } + + /* + * At this point we rely on normal DRM teardown ordering: + * no new user ioctls can access the global MMIO_REMAP BO + * once TTM teardown begins. + */ amdgpu_bo_unref(&adev->rmmio_remap.bo); adev->rmmio_remap.bo = NULL; } @@ -2172,8 +2193,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - /* Allocate the singleton MMIO_REMAP BO (4K) if supported */ - r = amdgpu_ttm_mmio_remap_bo_init(adev); + /* Allocate the singleton MMIO_REMAP BO if supported */ + r = amdgpu_ttm_alloc_mmio_remap_bo(adev); if (r) return r; @@ -2241,7 +2262,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); - amdgpu_ttm_mmio_remap_bo_fini(adev); + amdgpu_ttm_free_mmio_remap_bo(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index f902add31fc64..1d34daa0ebcd5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -105,8 +105,6 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for * signalling user mode queues. - * - * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -115,15 +113,13 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 #define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 -#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ AMDGPU_GEM_DOMAIN_OA | \ - AMDGPU_GEM_DOMAIN_DOORBELL | \ - AMDGPU_GEM_DOMAIN_MMIO_REMAP) + AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- 2.47.3