]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/msm/a6xx: Append SEL regs to dyn pwrup reglist
authorRob Clark <robin.clark@oss.qualcomm.com>
Tue, 26 May 2026 14:50:49 +0000 (07:50 -0700)
committerRob Clark <robin.clark@oss.qualcomm.com>
Fri, 29 May 2026 14:07:29 +0000 (07:07 -0700)
This is needed so that SEL reg values are restored on exit from IFPC.

Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
Reviewed-by: Anna Maniscalco <anna.maniscalco2000@gmail.com>
Reviewed-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/728218/
Message-ID: <20260526145137.160554-16-robin.clark@oss.qualcomm.com>

drivers/gpu/drm/msm/adreno/a6xx_gpu.c
drivers/gpu/drm/msm/adreno/a6xx_gpu.h
drivers/gpu/drm/msm/adreno/a8xx_gpu.c

index 8613d21cecb5f67bd47a6cbb69a1cd5577bbc5b7..4bb3be3ff9d0e5711f1c348334c70368ae75cbe9 100644 (file)
@@ -869,6 +869,7 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
                          A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE));
        }
        lock->dynamic_list_len = dyn_pwrup_reglist_count;
+       a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count;
 }
 
 static int a7xx_preempt_start(struct msm_gpu *gpu)
@@ -2458,11 +2459,60 @@ static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
        return progress;
 }
 
+static void
+perfcntr_select(struct msm_ringbuffer *ring, enum adreno_pipe pipe,
+               uint32_t regidx, uint32_t *countables, uint32_t nr,
+               uint32_t **reglist)
+{
+       OUT_PKT4(ring, regidx, nr);
+       for (unsigned i = 0; i < nr; i++)
+               OUT_RING(ring, countables[i]);
+
+       if (!*reglist)
+               return;
+
+       for (unsigned i = 0; i < nr; i++) {
+               /*
+                * Bitfield is in same position on a7xx, but only 2 bits..
+                * which is sufficient for NONE/BR/BV:
+                */
+               *(*reglist)++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe);
+               *(*reglist)++ = regidx + i;
+               *(*reglist)++ = countables[i];
+       }
+}
+
 static void
 a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
                        const struct msm_perfcntr_stream *stream)
 {
+       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+       struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
        enum adreno_pipe pipe = PIPE_NONE;
+       uint32_t *reglist = NULL;
+       uint32_t *reglist_sel_start;
+
+       if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
+               WARN_ON(!a6xx_gpu->pwrup_reglist_emitted);
+
+               struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
+               int off = (2 * lock->ifpc_list_len) +
+                         (2 * lock->preemption_list_len) +
+                         (3 * a6xx_gpu->dynamic_sel_reglist_offset);
+
+               reglist = (uint32_t *)&lock->regs[0];
+               reglist += off;
+               reglist_sel_start = reglist;
+
+               /* Clear any previously configured SEL reg entries: */
+               lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset;
+
+               /*
+                * Ensure CP sees the dynamic_list_len update before we
+                * start modifying the SEL entries:
+                */
+               dma_wmb();
+       }
 
        for (unsigned i = 0; i < stream->nr_groups; i++) {
                unsigned group_idx = msm_perfcntr_group_idx(stream, i);
@@ -2490,17 +2540,15 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
 
                const struct msm_perfcntr_counter *counter = &group->counters[base];
                unsigned nr = group_state->allocated_counters;
-               OUT_PKT4(ring, counter->select_reg, nr);
-               for (unsigned c = 0; c < nr; c++)
-                       OUT_RING(ring, group_state->countables[c]);
+               perfcntr_select(ring, pipe, counter->select_reg,
+                               group_state->countables, nr, &reglist);
 
                for (unsigned s = 0; s < ARRAY_SIZE(counter->slice_select_regs); s++) {
                        if (!counter->slice_select_regs[s])
                                break;
 
-                       OUT_PKT4(ring, counter->slice_select_regs[s], nr);
-                       for (unsigned c = 0; c < nr; c++)
-                               OUT_RING(ring, group_state->countables[c]);
+                       perfcntr_select(ring, pipe, counter->slice_select_regs[s],
+                                       group_state->countables, nr, &reglist);
                }
        }
 
@@ -2514,6 +2562,30 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
        OUT_RING(ring, upper_32_bits(rbmemptr(ring, perfcntr_fence)));
        OUT_RING(ring, stream->sel_fence);
 
+       /*
+        * Update the pwrup reglist size before flushing.  Kgsl does a shared-
+        * memory spinlock dance with SQE to avoid racing with IFPC exit.  But
+        * we can skip that since the ringbuffer programming will be executed
+        * by SQE after dynamic reglist size is updated.  So even if we lose
+        * the race, the register programming in the rb will overwrite/correct
+        * the SEL regs restored by SQE on IFPC exit, before sampling begins.
+        */
+       if (reglist) {
+               struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
+               unsigned nr_regs = (reglist - reglist_sel_start) / 3;
+
+               /*
+                * Ensure CP sees updates to the pwrup_reglist before it
+                * sees the new (increased) length:
+                */
+               dma_wmb();
+
+               /* Update dynamic reglist len to include new SEL reg programming: */
+               lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset + nr_regs;
+
+               WARN_ON_ONCE(reglist > (uint32_t *)((uint8_t *)lock + PWRUP_REGLIST_SIZE));
+       }
+
        a6xx_flush_yield(gpu, ring);
 
        /* Check to see if we need to start preemption */
index d3f0b40787db586edd064a808e654b3d2c2062b3..b72fb58bf223470cc2288b4f2f90aee310f249eb 100644 (file)
@@ -21,17 +21,19 @@ struct cpu_gpu_lock {
        uint32_t cpu_req;
        uint32_t turn;
        union {
+               /* a6xx: */
                struct {
                        uint16_t list_length;
                        uint16_t list_offset;
                };
+               /* a7xx+: */
                struct {
                        uint8_t ifpc_list_len;
                        uint8_t preemption_list_len;
                        uint16_t dynamic_list_len;
                };
        };
-       uint64_t regs[62];
+       uint64_t regs[];
 };
 
 /**
@@ -101,6 +103,13 @@ struct a6xx_gpu {
        uint64_t pwrup_reglist_iova;
        bool pwrup_reglist_emitted;
 
+       /*
+        * Offset of start of SEL regs appended to pwrup_reglist.  This
+        * is equal to lock->dynamic_list_len if no SEL regs are appended
+        * to the end of the dynamic reglist.
+        */
+       uint16_t dynamic_sel_reglist_offset;
+
        bool has_whereami;
 
        void __iomem *llc_mmio;
index 3adf250305485374a34f8d6968ff70068d4043f8..abe100d9aeaa8a570d6f5faab793e8ca70f43080 100644 (file)
@@ -464,6 +464,7 @@ static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu)
        }
 
        lock->dynamic_list_len = dyn_pwrup_reglist_count;
+       a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count;
 
 done:
        a8xx_aperture_clear(gpu);