]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/radeon: fix r100_copy_blit for large BOs
authorPavel Ondračka <pavel.ondracka@gmail.com>
Wed, 10 Jun 2026 08:32:45 +0000 (10:32 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 17 Jun 2026 22:19:00 +0000 (18:19 -0400)
r100_copy_blit() copies BOs as 1024-pixel-wide ARGB8888 blits, so one
GPU page becomes one blit row. Large copies are split into chunks of at
most 8191 rows.

The kernel register header names the packet coordinate dwords SRC_Y_X
and DST_Y_X. In the BITBLT_MULTI description in
R5xx_Acceleration_v1.5.pdf docs, these correspond to [SRC_X1 | SRC_Y1]
and [DST_X1 | DST_Y1], which are signed 13-bit coordinates in the
-8192..8191 range. The old code kept SRC/DST_PITCH_OFFSET at the BO base
and used SRC_Y_X/DST_Y_X as the chunk address, so large BO moves could
exceed that coordinate range.

Compute per-chunk SRC/DST_PITCH_OFFSET bases and emit zero source and
destination coordinates. r100_copy_blit() already packs
SRC/DST_PITCH_OFFSET as pitch plus base offset, so large chunk addresses
belong there rather than in the coordinate fields.

This fixes Prison Architect corruption with 4096x4096 mipped textures
after they are evicted to GTT under memory pressure on RV530.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/6716
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 87be26aee76239c6da03e599f238a426897f78ad)
Cc: stable@vger.kernel.org
drivers/gpu/drm/radeon/r100.c

index 3ac1a79b6f13f804f8490d4f20598cc8fe46836c..533215d6e9cb411175e1265911878ca3fe02701c 100644 (file)
@@ -906,6 +906,7 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 {
        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        struct radeon_fence *fence;
+       uint64_t cur_src_offset, cur_dst_offset;
        uint32_t cur_pages;
        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
        uint32_t pitch;
@@ -934,6 +935,10 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
                        cur_pages = 8191;
                }
                num_gpu_pages -= cur_pages;
+               cur_src_offset = src_offset +
+                       (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE;
+               cur_dst_offset = dst_offset +
+                       (uint64_t)num_gpu_pages * RADEON_GPU_PAGE_SIZE;
 
                /* pages are in Y direction - height
                   page width in X direction - width */
@@ -950,13 +955,13 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
                                  RADEON_DP_SRC_SOURCE_MEMORY |
                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
                                  RADEON_GMC_WR_MSK_DIS);
-               radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
-               radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
+               radeon_ring_write(ring, (pitch << 22) | (cur_src_offset >> 10));
+               radeon_ring_write(ring, (pitch << 22) | (cur_dst_offset >> 10));
                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
                radeon_ring_write(ring, 0);
                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
-               radeon_ring_write(ring, num_gpu_pages);
-               radeon_ring_write(ring, num_gpu_pages);
+               radeon_ring_write(ring, 0);
+               radeon_ring_write(ring, 0);
                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
        }
        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));