u64 usm_batch_base_ofs;
/** @cleared_mem_ofs: VM offset of @cleared_bo. */
u64 cleared_mem_ofs;
+ /** @large_page_copy_ofs: VM offset of 2M pages used for large copies */
+ u64 large_page_copy_ofs;
+ /**
+ * @large_page_copy_pdes: BO offset to writeout 2M pages (PDEs) used for
+ * large copies
+ */
+ u64 large_page_copy_pdes;
/**
* @fence: dma-fence representing the last migration job batch.
* Protected by @job_mutex.
(i + 1) * 8, u64, entry);
}
+ /* Reserve 2M PDEs */
+ level = 1;
+ m->large_page_copy_ofs = NUM_PT_SLOTS << xe_pt_shift(level);
+ m->large_page_copy_pdes = map_ofs + XE_PAGE_SIZE * level +
+ NUM_PT_SLOTS * 8;
+
/* Set up a 1GiB NULL mapping at 255GiB offset. */
level = 2;
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64,
static void build_pt_update_batch_sram(struct xe_migrate *m,
struct xe_bb *bb, u32 pt_offset,
struct drm_pagemap_addr *sram_addr,
- u32 size)
+ u32 size, int level)
{
u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
- u64 gpu_page_size = 0x1ull << xe_pt_shift(0);
+ u64 gpu_page_size = 0x1ull << xe_pt_shift(level);
u32 ptes;
int i = 0;
again:
pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
addr, pat_index,
- 0, false, 0);
+ level, false, 0);
bb->cs[bb->len++] = lower_32_bits(pte);
bb->cs[bb->len++] = upper_32_bits(pte);
}
}
+static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr,
+ unsigned long size)
+{
+ u32 large_size = (0x1 << xe_pt_shift(1));
+ unsigned long i, incr = large_size / PAGE_SIZE;
+
+ for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE); i += incr)
+ if (PAGE_SIZE << sram_addr[i].order != large_size)
+ return false;
+
+ return true;
+}
+
enum xe_migrate_copy_dir {
XE_MIGRATE_COPY_TO_VRAM,
XE_MIGRATE_COPY_TO_SRAM,
PAGE_SIZE : 4;
int err;
unsigned long i, j;
+ bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset);
if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) ||
(sram_offset | vram_addr) & XE_CACHELINE_MASK))
* struct drm_pagemap_addr. Ensure this is the case even with higher
* orders.
*/
- for (i = 0; i < npages;) {
+ for (i = 0; !use_pde && i < npages;) {
unsigned int order = sram_addr[i].order;
for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
i += NR_PAGES(order);
}
- build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
- sram_addr, len + sram_offset);
+ if (use_pde)
+ build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes,
+ sram_addr, len + sram_offset, 1);
+ else
+ build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
+ sram_addr, len + sram_offset, 0);
if (dir == XE_MIGRATE_COPY_TO_VRAM) {
- src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
+ if (use_pde)
+ src_L0_ofs = m->large_page_copy_ofs + sram_offset;
+ else
+ src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
} else {
src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
- dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
+ if (use_pde)
+ dst_L0_ofs = m->large_page_copy_ofs + sram_offset;
+ else
+ dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
}
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;