}
#define EMIT_COPY_DW 10
-static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
- u64 src_ofs, u64 dst_ofs, unsigned int size,
- unsigned int pitch)
+static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u64 dst_ofs, unsigned int size,
+ unsigned int pitch)
{
struct xe_device *xe = gt_to_xe(gt);
u32 mocs = 0;
bb->cs[bb->len++] = upper_32_bits(src_ofs);
}
+#define PAGE_COPY_MODE_PS SZ_256 /* hw uses 256 bytes as the page-size */
+static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u64 dst_ofs, unsigned int size, unsigned int pitch)
+{
+ u32 mode, copy_type, width;
+
+ xe_gt_assert(gt, IS_ALIGNED(size, pitch));
+ xe_gt_assert(gt, pitch <= U16_MAX);
+ xe_gt_assert(gt, pitch);
+ xe_gt_assert(gt, size);
+
+ if (IS_ALIGNED(size, PAGE_COPY_MODE_PS) &&
+ IS_ALIGNED(lower_32_bits(src_ofs), PAGE_COPY_MODE_PS) &&
+ IS_ALIGNED(lower_32_bits(dst_ofs), PAGE_COPY_MODE_PS)) {
+ mode = MEM_COPY_PAGE_COPY_MODE;
+ copy_type = 0; /* linear copy */
+ width = size / PAGE_COPY_MODE_PS;
+ } else if (pitch > 1) {
+ xe_gt_assert(gt, size / pitch <= U16_MAX);
+ mode = 0; /* BYTE_COPY */
+ copy_type = MEM_COPY_MATRIX_COPY;
+ width = pitch;
+ } else {
+ mode = 0; /* BYTE_COPY */
+ copy_type = 0; /* linear copy */
+ width = size;
+ }
+
+ xe_gt_assert(gt, width <= U16_MAX);
+
+ bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type;
+ bb->cs[bb->len++] = width - 1;
+ bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */
+ bb->cs[bb->len++] = pitch - 1;
+ bb->cs[bb->len++] = pitch - 1;
+ bb->cs[bb->len++] = lower_32_bits(src_ofs);
+ bb->cs[bb->len++] = upper_32_bits(src_ofs);
+ bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+ bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+ bb->cs[bb->len++] = FIELD_PREP(MEM_COPY_SRC_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+ FIELD_PREP(MEM_COPY_DST_MOCS_INDEX_MASK, gt->mocs.uc_index);
+}
+
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+ u64 src_ofs, u64 dst_ofs, unsigned int size,
+ unsigned int pitch)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe->info.has_mem_copy_instr)
+ emit_mem_copy(gt, bb, src_ofs, dst_ofs, size, pitch);
+ else
+ emit_xy_fast_copy(gt, bb, src_ofs, dst_ofs, size, pitch);
+}
+
static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
{
return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
.has_display = true,
.has_flat_ccs = 1,
.has_pxp = true,
+ .has_mem_copy_instr = true,
.max_gt_per_tile = 2,
.needs_scratch = true,
.va_bits = 48,
.has_heci_cscfi = 1,
.has_late_bind = true,
.has_sriov = true,
+ .has_mem_copy_instr = true,
.max_gt_per_tile = 2,
.needs_scratch = true,
.subplatforms = (const struct xe_subplatform_desc[]) {
.has_display = true,
.has_flat_ccs = 1,
.has_sriov = true,
+ .has_mem_copy_instr = true,
.max_gt_per_tile = 2,
.needs_scratch = true,
.needs_shared_vf_gt_wq = true,
.dma_mask_size = 46,
.has_display = true,
.has_flat_ccs = 1,
+ .has_mem_copy_instr = true,
.max_gt_per_tile = 2,
.require_force_probe = true,
.va_bits = 48,
xe->info.has_pxp = desc->has_pxp;
xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
desc->has_sriov;
+ xe->info.has_mem_copy_instr = desc->has_mem_copy_instr;
xe->info.skip_guc_pc = desc->skip_guc_pc;
xe->info.skip_mtcfg = desc->skip_mtcfg;
xe->info.skip_pcode = desc->skip_pcode;