From db7dde99049f04b99cbf518be3a189b3e9f5c1d8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Piotr=20Pi=C3=B3rkowski?= Date: Fri, 3 Oct 2025 18:26:15 +0200 Subject: [PATCH] drm/xe: Add initial support for separate kernel VRAM region on the tile MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit So far, kernel and userspace allocations have shared the same VRAM region. However, in some scenarios, it may be necessary to reserve a separate VRAM area exclusively for kernel allocations. Let's add preliminary support for such a configuration. v2: - replaced for_each_bo_flag_vram with the improved for_each_set_bo_vram_flag helper (Matthew) - moved the VRAM flag iteration macro definition into xe_bo.c (Matthew) - drop unused bo_flgas from bo_vram_flags_to_vram_placement (Matthew) - use hweight32 helper in __xe_bo_fixed_placement for readability (Matthew) v3: remove unnecessary VRAM fixup id Signed-off-by: Piotr Piórkowski Cc: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251003162619.1984236-2-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 78 ++++++++++++++++++---------- drivers/gpu/drm/xe/xe_bo.h | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 10 +++- drivers/gpu/drm/xe/xe_tile.c | 8 +++ drivers/gpu/drm/xe/xe_vram.c | 6 ++- 5 files changed, 75 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4410e28dee54b..ce20f58ee80e5 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -34,6 +34,7 @@ #include "xe_res_cursor.h" #include "xe_shrinker.h" #include "xe_sriov_vf_ccs.h" +#include "xe_tile.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -81,6 +82,10 @@ static struct ttm_placement tt_placement = { .placement = tt_placement_flags, }; +#define for_each_set_bo_vram_flag(bit__, bo_flags__) \ + for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \ + for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK) + bool mem_type_is_vram(u32 mem_type) { return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN; @@ -213,6 +218,27 @@ static bool force_contiguous(u32 bo_flags) bo_flags & XE_BO_FLAG_PINNED; } +static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag) +{ + xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK); + xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0); + + return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1; +} + +static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 vram_flag, + enum ttm_bo_type type) +{ + u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag); + + xe_assert(xe, tile_id < xe->info.tile_count); + + if (type == ttm_bo_type_kernel) + return xe->tiles[tile_id].mem.kernel_vram->placement; + else + return xe->tiles[tile_id].mem.vram->placement; +} + static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { @@ -245,12 +271,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags, u32 *c) + u32 bo_flags, enum ttm_bo_type type, u32 *c) { - if (bo_flags & XE_BO_FLAG_VRAM0) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_FLAG_VRAM1) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); + u32 vram_flag; + + for_each_set_bo_vram_flag(vram_flag, bo_flags) { + u32 pl = bo_vram_flags_to_vram_placement(xe, vram_flag, type); + + add_vram(xe, bo, bo->placements, bo_flags, pl, c); + } } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -269,11 +298,11 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, } static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags) + u32 bo_flags, enum ttm_bo_type type) { u32 c = 0; - try_add_vram(xe, bo, bo_flags, &c); + try_add_vram(xe, bo, bo_flags, type, &c); try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); @@ -289,10 +318,10 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, } int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags) + u32 bo_flags, enum ttm_bo_type type) { xe_bo_assert_held(bo); - return __xe_bo_placement_for_flags(xe, bo, bo_flags); + return __xe_bo_placement_for_flags(xe, bo, bo_flags, type); } static void xe_evict_flags(struct ttm_buffer_object *tbo, @@ -2164,7 +2193,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, xe_validation_assert_exec(xe, exec, &bo->ttm.base); if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { - err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type); if (WARN_ON(err)) { xe_ttm_bo_destroy(&bo->ttm); return ERR_PTR(err); @@ -2222,34 +2251,31 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, } static int __xe_bo_fixed_placement(struct xe_device *xe, - struct xe_bo *bo, + struct xe_bo *bo, enum ttm_bo_type type, u32 flags, u64 start, u64 end, u64 size) { struct ttm_place *place = bo->placements; + u32 vram_flag, vram_stolen_flags; if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) return -EINVAL; + vram_flag = flags & XE_BO_FLAG_VRAM_MASK; + vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag; + + /* check if more than one VRAM/STOLEN flag is set */ + if (hweight32(vram_stolen_flags) > 1) + return -EINVAL; + place->flags = TTM_PL_FLAG_CONTIGUOUS; place->fpfn = start >> PAGE_SHIFT; place->lpfn = end >> PAGE_SHIFT; - switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) { - case XE_BO_FLAG_VRAM0: - place->mem_type = XE_PL_VRAM0; - break; - case XE_BO_FLAG_VRAM1: - place->mem_type = XE_PL_VRAM1; - break; - case XE_BO_FLAG_STOLEN: + if (flags & XE_BO_FLAG_STOLEN) place->mem_type = XE_PL_STOLEN; - break; - - default: - /* 0 or multiple of the above set */ - return -EINVAL; - } + else + place->mem_type = bo_vram_flags_to_vram_placement(xe, vram_flag, type); bo->placement = (struct ttm_placement) { .num_placement = 1, @@ -2278,7 +2304,7 @@ __xe_bo_create_locked(struct xe_device *xe, return bo; flags |= XE_BO_FLAG_FIXED_PLACEMENT; - err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); + err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size); if (err) { xe_bo_free(bo); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index a77af42b5f9ea..9a8401300b15c 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -122,7 +122,7 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, - u32 bo_flags); + u32 bo_flags, enum ttm_bo_type type); static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 74d7af830b85d..9892444189501 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -158,7 +158,15 @@ struct xe_tile { /** @mem: memory management info for tile */ struct { /** - * @mem.vram: VRAM info for tile. + * @mem.kernel_vram: kernel-dedicated VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *kernel_vram; + + /** + * @mem.vram: general purpose VRAM info for tile. * * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index d49ba34019635..6edb5062c1da5 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -124,6 +124,14 @@ int xe_tile_alloc_vram(struct xe_tile *tile) return -ENOMEM; tile->mem.vram = vram; + /* + * If the kernel_vram is not already allocated, + * it means that tile has common VRAM region for + * kernel and user space. + */ + if (!tile->mem.kernel_vram) + tile->mem.kernel_vram = tile->mem.vram; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedbb..7adfccf68e4c8 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -13,6 +13,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_assert.h" +#include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_mcr.h" @@ -283,8 +284,11 @@ static void vram_fini(void *arg) xe->mem.vram->mapping = NULL; - for_each_tile(tile, xe, id) + for_each_tile(tile, xe, id) { tile->mem.vram->mapping = NULL; + if (tile->mem.kernel_vram) + tile->mem.kernel_vram->mapping = NULL; + } } struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) -- 2.47.3