--- /dev/null
+From 831eff09d59335f8b75af757fe30bb57453335e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 18:43:39 -0400
+Subject: drm/amd/display: Add debug option for allocating extra way for cursor
+
+From: Alvin Lee <Alvin.Lee2@amd.com>
+
+[ Upstream commit 6eef37460584269b240f45aa47ebb61aae848082 ]
+
+[Why and How]
+- Add a debug option for allocating extra way for cursor
+- Remove usage of cache_cursor_addr since it's not gaurenteed
+ to be populated
+- Include cursor size in MALL calculation if it exceeds the
+ DCN cursor buffer size (and don't need extra way for cursor)
+
+Reviewed-by: Aurabindo Pillai <Aurabindo.Pillai@amd.com>
+Acked-by: Wayne Lin <wayne.lin@amd.com>
+Signed-off-by: Alvin Lee <Alvin.Lee2@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 4d2852412306 ("drm/amd/display: Fix calculation for cursor CAB allocation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc.h | 1 +
+ drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 10 ++++++----
+ drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 +
+ .../gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 +
+ 4 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index a652dec5d02f..0d4340f0f688 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -747,6 +747,7 @@ struct dc_debug_options {
+ bool force_subvp_mclk_switch;
+ bool allow_sw_cursor_fallback;
+ unsigned int force_subvp_num_ways;
++ bool alloc_extra_way_for_cursor;
+ bool force_usr_allow;
+ /* uses value at boot and disables switch */
+ bool disable_dtb_ref_clk_switch;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+index c72166e096ba..0751e1202c95 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+@@ -304,7 +304,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
+ * using the max for calculation
+ */
+ if (hubp->curs_attr.width > 0) {
+- cursor_size = hubp->curs_attr.width * hubp->curs_attr.height;
++ // Round cursor width to next multiple of 64
++ cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
+ break;
+ }
+ }
+@@ -325,7 +326,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
+ break;
+ }
+
+- if (stream->cursor_position.enable && plane->address.grph.cursor_cache_addr.quad_part) {
++ if (stream->cursor_position.enable && !dc->debug.alloc_extra_way_for_cursor &&
++ cursor_size > 16384) {
+ cache_lines_used += dcn32_cache_lines_for_surface(dc, cursor_size,
+ plane->address.grph.cursor_cache_addr.quad_part);
+ }
+@@ -345,8 +347,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
+ plane = ctx->stream_status[i].plane_states[j];
+
+ if (stream->cursor_position.enable && plane &&
+- !plane->address.grph.cursor_cache_addr.quad_part &&
+- cursor_size > 16384) {
++ dc->debug.alloc_extra_way_for_cursor &&
++ cursor_size > 16384) {
+ /* Cursor caching is not supported since it won't be on the same line.
+ * So we need an extra line to accommodate it. With large cursors and a single 4k monitor
+ * this case triggers corruption. If we're at the edge, then dont trigger display refresh
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+index c3b783cea8a0..6f1bcb45a3b2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+@@ -872,6 +872,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .enable_single_display_2to1_odm_policy = true,
+ .enable_dp_dig_pixel_rate_div_policy = 1,
+ .allow_sw_cursor_fallback = false,
++ .alloc_extra_way_for_cursor = true,
+ };
+
+ static const struct dc_debug_options debug_defaults_diags = {
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+index 7309eed33a61..d074716dc197 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+@@ -873,6 +873,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .enable_single_display_2to1_odm_policy = true,
+ .enable_dp_dig_pixel_rate_div_policy = 1,
+ .allow_sw_cursor_fallback = false,
++ .alloc_extra_way_for_cursor = true,
+ };
+
+ static const struct dc_debug_options debug_defaults_diags = {
+--
+2.35.1
+
--- /dev/null
+From a58e8725e86ad0f437659d244bcc096f100bc475 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Aug 2022 16:05:03 -0400
+Subject: drm/amd/display: Added debug option for forcing subvp num ways
+
+From: Lee, Alvin <Alvin.Lee2@amd.com>
+
+[ Upstream commit 5c1a431aaf52bbba8b6e2c4e9b4037a09509c0e3 ]
+
+[Description]
+Regkey option for forcing num ways for subvp for debug purposes
+
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Reviewed-by: Jun Lei <Jun.Lei@amd.com>
+Acked-by: Pavle Kotarac <Pavle.Kotarac@amd.com>
+Signed-off-by: Alvin Lee <Alvin.Lee2@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 4d2852412306 ("drm/amd/display: Fix calculation for cursor CAB allocation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc.h | 1 +
+ .../drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 11 ++++++++---
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index dbf8158b832e..a652dec5d02f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -746,6 +746,7 @@ struct dc_debug_options {
+ bool force_disable_subvp;
+ bool force_subvp_mclk_switch;
+ bool allow_sw_cursor_fallback;
++ unsigned int force_subvp_num_ways;
+ bool force_usr_allow;
+ /* uses value at boot and disables switch */
+ bool disable_dtb_ref_clk_switch;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+index 13cd1f2e50ca..7c37575d69c7 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+@@ -54,13 +54,14 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
+ uint32_t num_mblks = 0;
+ uint32_t cache_lines_per_plane = 0;
+ uint32_t i = 0, j = 0;
+- uint32_t mblk_width = 0;
+- uint32_t mblk_height = 0;
++ uint16_t mblk_width = 0;
++ uint16_t mblk_height = 0;
+ uint32_t full_vp_width_blk_aligned = 0;
+ uint32_t full_vp_height_blk_aligned = 0;
+ uint32_t mall_alloc_width_blk_aligned = 0;
+ uint32_t mall_alloc_height_blk_aligned = 0;
+- uint32_t full_vp_height = 0;
++ uint16_t full_vp_height = 0;
++ bool subvp_in_use = false;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+@@ -70,6 +71,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
+ pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ struct pipe_ctx *main_pipe = NULL;
+
++ subvp_in_use = true;
+ /* Get full viewport height from main pipe (required for MBLK calculation) */
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ main_pipe = &context->res_ctx.pipe_ctx[j];
+@@ -129,6 +131,9 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
+ if (cache_lines_used % lines_per_way > 0)
+ num_ways++;
+
++ if (subvp_in_use && dc->debug.force_subvp_num_ways > 0)
++ num_ways = dc->debug.force_subvp_num_ways;
++
+ return num_ways;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 697930fe9ee20c5a9162e17ed49fba6ad1319195 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 23:03:03 -0400
+Subject: drm/amd/display: Fix calculation for cursor CAB allocation
+
+From: George Shen <george.shen@amd.com>
+
+[ Upstream commit 4d285241230676ba8b888701b89684b4e0360fcc ]
+
+[Why]
+The cursor size (in memory) is currently incorrectly calculated,
+resulting not enough CAB being allocated for static screen cursor
+in MALL refresh. This results in cursor image corruption.
+
+[How]
+Use cursor pitch instead of cursor width when calculating cursor size.
+Update num cache lines calculation to use the result of the cursor size
+calculation instead of manually recalculating again.
+
+Reviewed-by: Alvin Lee <Alvin.Lee2@amd.com>
+Acked-by: Tom Chung <chiahsuan.chung@amd.com>
+Signed-off-by: George Shen <george.shen@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.0.x
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+index 84a20ce9bd36..bbc0bfbec6c4 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+@@ -284,8 +284,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
+ using the max for calculation */
+
+ if (hubp->curs_attr.width > 0) {
+- // Round cursor width to next multiple of 64
+- cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
++ cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
+
+ switch (pipe->stream->cursor_attributes.color_format) {
+ case CURSOR_MODE_MONO:
+@@ -310,9 +309,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
+ cursor_size > 16384) {
+ /* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
+ */
+- cache_lines_used += (((hubp->curs_attr.width * hubp->curs_attr.height * cursor_bpp +
+- DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) *
+- DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2;
++ cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
++ DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) /
++ dc->caps.cache_line_size + 2;
+ }
+ break;
+ }
+@@ -730,10 +729,7 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
+ struct hubp *hubp = pipe->plane_res.hubp;
+
+ if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) {
+- //Round cursor width up to next multiple of 64
+- int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64;
+- int cursor_height = hubp->curs_attr.height;
+- int cursor_size = cursor_width * cursor_height;
++ int cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
+
+ switch (hubp->curs_attr.color_format) {
+ case CURSOR_MODE_MONO:
+--
+2.35.1
+
--- /dev/null
+From 7eab1a97cc2847068540336209a15956ac139e32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Sep 2022 11:05:17 -0400
+Subject: drm/amd/display: Update MALL SS NumWays calculation
+
+From: Alvin Lee <Alvin.Lee2@amd.com>
+
+[ Upstream commit 525a65c77db51cf5d6c6d8e3f8d07efeb2270416 ]
+
+[Description]
+Update MALL SS NumWays calculation according
+to programming guide.
+
+Reviewed-by: Jun Lei <Jun.Lei@amd.com>
+Acked-by: Jasdeep Dhillon <jdhillon@amd.com>
+Signed-off-by: Alvin Lee <Alvin.Lee2@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 4d2852412306 ("drm/amd/display: Fix calculation for cursor CAB allocation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc.h | 1 +
+ .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 207 ++++++++----------
+ 2 files changed, 98 insertions(+), 110 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 0d4340f0f688..fcddf60d3c10 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -747,6 +747,7 @@ struct dc_debug_options {
+ bool force_subvp_mclk_switch;
+ bool allow_sw_cursor_fallback;
+ unsigned int force_subvp_num_ways;
++ unsigned int force_mall_ss_num_ways;
+ bool alloc_extra_way_for_cursor;
+ bool force_usr_allow;
+ /* uses value at boot and disables switch */
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+index 0751e1202c95..84a20ce9bd36 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+@@ -49,6 +49,7 @@
+ #include "dcn20/dcn20_optc.h"
+ #include "dmub_subvp_state.h"
+ #include "dce/dmub_hw_lock_mgr.h"
++#include "dcn32_resource.h"
+ #include "dc_link_dp.h"
+ #include "dmub/inc/dmub_subvp_state.h"
+
+@@ -198,42 +199,6 @@ static bool dcn32_check_no_memory_request_for_cab(struct dc *dc)
+ return false;
+ }
+
+-/* This function takes in the start address and surface size to be cached in CAB
+- * and calculates the total number of cache lines required to store the surface.
+- * The number of cache lines used for each surface is calculated independently of
+- * one another. For example, if there is a primary surface(1), meta surface(2), and
+- * cursor(3), this function should be called 3 times to calculate the number of cache
+- * lines used for each of those surfaces.
+- */
+-static uint32_t dcn32_cache_lines_for_surface(struct dc *dc, uint32_t surface_size, uint64_t start_address)
+-{
+- uint32_t lines_used = 1;
+- uint32_t num_cached_bytes = 0;
+- uint32_t remaining_size = 0;
+- uint32_t cache_line_size = dc->caps.cache_line_size;
+- uint32_t remainder = 0;
+-
+- /* 1. Calculate surface size minus the number of bytes stored
+- * in the first cache line (all bytes in first cache line might
+- * not be fully used).
+- */
+- div_u64_rem(start_address, cache_line_size, &remainder);
+- num_cached_bytes = cache_line_size - remainder;
+- remaining_size = surface_size - num_cached_bytes;
+-
+- /* 2. Calculate number of cache lines that will be fully used with
+- * the remaining number of bytes to be stored.
+- */
+- lines_used += (remaining_size / cache_line_size);
+-
+- /* 3. Check if we need an extra line due to the remaining size not being
+- * a multiple of CACHE_LINE_SIZE.
+- */
+- if (remaining_size % cache_line_size > 0)
+- lines_used++;
+-
+- return lines_used;
+-}
+
+ /* This function loops through every surface that needs to be cached in CAB for SS,
+ * and calculates the total number of ways required to store all surfaces (primary,
+@@ -241,96 +206,116 @@ static uint32_t dcn32_cache_lines_for_surface(struct dc *dc, uint32_t surface_si
+ */
+ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx)
+ {
+- uint8_t i, j;
++ uint8_t i;
++ int j;
+ struct dc_stream_state *stream = NULL;
+ struct dc_plane_state *plane = NULL;
+- uint32_t surface_size = 0;
+ uint32_t cursor_size = 0;
+- uint32_t cache_lines_used = 0;
+ uint32_t total_lines = 0;
+ uint32_t lines_per_way = 0;
+- uint32_t num_ways = 0;
+- uint32_t prev_addr_low = 0;
++ uint8_t num_ways = 0;
++ uint8_t bytes_per_pixel = 0;
++ uint8_t cursor_bpp = 0;
++ uint16_t mblk_width = 0;
++ uint16_t mblk_height = 0;
++ uint16_t mall_alloc_width_blk_aligned = 0;
++ uint16_t mall_alloc_height_blk_aligned = 0;
++ uint16_t num_mblks = 0;
++ uint32_t bytes_in_mall = 0;
++ uint32_t cache_lines_used = 0;
++ uint32_t cache_lines_per_plane = 0;
+
+- for (i = 0; i < ctx->stream_count; i++) {
+- stream = ctx->streams[i];
++ for (i = 0; i < dc->res_pool->pipe_count; i++) {
++ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+- // Don't include PSR surface in the total surface size for CAB allocation
+- if (stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED)
++ if (!pipe->stream || !pipe->plane_state ||
++ pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED ||
++ pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ continue;
+
+- if (ctx->stream_status[i].plane_count == 0)
+- continue;
++ bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
++ mblk_width = DCN3_2_MBLK_WIDTH;
++ mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE;
+
+- // For each stream, loop through each plane to calculate the number of cache
+- // lines required to store the surface in CAB
+- for (j = 0; j < ctx->stream_status[i].plane_count; j++) {
+- plane = ctx->stream_status[i].plane_states[j];
++ /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
++ * FLOOR(vp_x_start, blk_width)
++ *
++ * mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c
++ */
++ mall_alloc_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
++ pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
++ (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
++
++ /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
++ * FLOOR(vp_y_start, blk_height)
++ *
++ * mall_alloc_height_blk_aligned_l/c = full_vp_height_blk_aligned_l/c
++ */
++ mall_alloc_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
++ pipe->plane_res.scl_data.viewport.height + mblk_height - 1) / mblk_height * mblk_height) +
++ (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
+
+- // Calculate total surface size
+- if (prev_addr_low != plane->address.grph.addr.u.low_part) {
+- /* if plane address are different from prev FB, then userspace allocated separate FBs*/
+- surface_size += plane->plane_size.surface_pitch *
+- plane->plane_size.surface_size.height *
+- (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
++ num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
++ ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
+
+- prev_addr_low = plane->address.grph.addr.u.low_part;
+- } else {
+- /* We have the same fb for all the planes.
+- * Xorg always creates one giant fb that holds all surfaces,
+- * so allocating it once is sufficient.
+- * */
+- continue;
+- }
+- // Convert surface size + starting address to number of cache lines required
+- // (alignment accounted for)
+- cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
+- plane->address.grph.addr.quad_part);
+-
+- if (plane->address.grph.meta_addr.quad_part) {
+- // Meta surface
+- cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
+- plane->address.grph.meta_addr.quad_part);
+- }
+- }
++ /* For DCC:
++ * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1)
++ */
++ if (pipe->plane_state->dcc.enable)
++ num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel +
++ (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
+
+- // Include cursor size for CAB allocation
+- for (j = 0; j < dc->res_pool->pipe_count; j++) {
+- struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j];
+- struct hubp *hubp = pipe->plane_res.hubp;
++ bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
+
+- if (pipe->stream && pipe->plane_state && hubp)
+- /* Find the cursor plane and use the exact size instead of
+- * using the max for calculation
+- */
+- if (hubp->curs_attr.width > 0) {
+- // Round cursor width to next multiple of 64
+- cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
+- break;
+- }
+- }
++ /* (cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
++ * (MALL is 64-byte aligned)
++ */
++ cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
++ cache_lines_used += cache_lines_per_plane;
++ }
+
+- switch (stream->cursor_attributes.color_format) {
+- case CURSOR_MODE_MONO:
+- cursor_size /= 2;
+- break;
+- case CURSOR_MODE_COLOR_1BIT_AND:
+- case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
+- case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
+- cursor_size *= 4;
+- break;
++ // Include cursor size for CAB allocation
++ for (j = 0; j < dc->res_pool->pipe_count; j++) {
++ struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j];
++ struct hubp *hubp = pipe->plane_res.hubp;
+
+- case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
+- case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
+- cursor_size *= 8;
+- break;
+- }
++ if (pipe->stream && pipe->plane_state && hubp)
++ /* Find the cursor plane and use the exact size instead of
++ using the max for calculation */
+
+- if (stream->cursor_position.enable && !dc->debug.alloc_extra_way_for_cursor &&
+- cursor_size > 16384) {
+- cache_lines_used += dcn32_cache_lines_for_surface(dc, cursor_size,
+- plane->address.grph.cursor_cache_addr.quad_part);
+- }
++ if (hubp->curs_attr.width > 0) {
++ // Round cursor width to next multiple of 64
++ cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
++
++ switch (pipe->stream->cursor_attributes.color_format) {
++ case CURSOR_MODE_MONO:
++ cursor_size /= 2;
++ cursor_bpp = 4;
++ break;
++ case CURSOR_MODE_COLOR_1BIT_AND:
++ case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
++ case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
++ cursor_size *= 4;
++ cursor_bpp = 4;
++ break;
++
++ case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
++ case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
++ cursor_size *= 8;
++ cursor_bpp = 8;
++ break;
++ }
++
++ if (pipe->stream->cursor_position.enable && !dc->debug.alloc_extra_way_for_cursor &&
++ cursor_size > 16384) {
++ /* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
++ */
++ cache_lines_used += (((hubp->curs_attr.width * hubp->curs_attr.height * cursor_bpp +
++ DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) *
++ DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2;
++ }
++ break;
++ }
+ }
+
+ // Convert number of cache lines required to number of ways
+@@ -360,7 +345,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
+ }
+ }
+ }
+-
++ if (dc->debug.force_mall_ss_num_ways > 0) {
++ num_ways = dc->debug.force_mall_ss_num_ways;
++ }
+ return num_ways;
+ }
+
+--
+2.35.1
+
nios2-add-force-for-vmlinuz.gz.patch
drm-amdgpu-enable-sa-software-trap.patch
drm-amdkfd-update-gfx11-cwsr-trap-handler.patch
+drm-amd-display-added-debug-option-for-forcing-subvp.patch
+drm-amd-display-add-debug-option-for-allocating-extr.patch
+drm-amd-display-update-mall-ss-numways-calculation.patch
+drm-amd-display-fix-calculation-for-cursor-cab-alloc.patch
+usb-dwc3-gadget-conditionally-remove-requests.patch
+usb-dwc3-gadget-return-eshutdown-on-ep-disable.patch
+usb-dwc3-gadget-clear-ep-descriptor-last.patch
--- /dev/null
+From 7dcd033aa86989356f3995d4d7c7c1d3e4dded87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Nov 2022 17:19:43 -0800
+Subject: usb: dwc3: gadget: Clear ep descriptor last
+
+From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+
+[ Upstream commit f90f5afd5083a7cb4aee13bd4cc0ae600bd381ca ]
+
+Until the endpoint is disabled, its descriptors should remain valid.
+When its requests are removed from ep disable, the request completion
+routine may attempt to access the endpoint's descriptor. Don't clear the
+descriptors before that.
+
+Fixes: f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers")
+Cc: stable@vger.kernel.org
+Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+Link: https://lore.kernel.org/r/45db7c83b209259115bf652af210f8b2b3b1a383.1668561364.git.Thinh.Nguyen@synopsys.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 448c8e6bc99d..6f61a288073b 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1016,18 +1016,18 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
+ reg &= ~DWC3_DALEPENA_EP(dep->number);
+ dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
+
+- /* Clear out the ep descriptors for non-ep0 */
+- if (dep->number > 1) {
+- dep->endpoint.comp_desc = NULL;
+- dep->endpoint.desc = NULL;
+- }
+-
+ dwc3_remove_requests(dwc, dep, -ESHUTDOWN);
+
+ dep->stream_capable = false;
+ dep->type = 0;
+ dep->flags &= DWC3_EP_TXFIFO_RESIZED;
+
++ /* Clear out the ep descriptors for non-ep0 */
++ if (dep->number > 1) {
++ dep->endpoint.comp_desc = NULL;
++ dep->endpoint.desc = NULL;
++ }
++
+ return 0;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 50ec22377141bcf9118c915dfcb5f5da6e407446 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 23:35:23 +0200
+Subject: usb: dwc3: gadget: conditionally remove requests
+
+From: Michael Grzeschik <m.grzeschik@pengutronix.de>
+
+[ Upstream commit b44c0e7fef51ee7e8ca8c6efbf706f5613787100 ]
+
+The functions stop_active_transfers and ep_disable are both calling
+remove_requests. This functions in both cases will giveback the requests
+with status ESHUTDOWN, which also represents an physical disconnection.
+For ep_disable this is not true. This patch adds the status parameter to
+remove_requests and sets the status to ECONNRESET on ep_disable.
+
+Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
+Link: https://lore.kernel.org/r/20220720213523.1055897-1-m.grzeschik@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: f90f5afd5083 ("usb: dwc3: gadget: Clear ep descriptor last")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 0ed9826a4c47..ffff6f41d2ac 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -965,7 +965,7 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action)
+ return 0;
+ }
+
+-static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
++static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status)
+ {
+ struct dwc3_request *req;
+
+@@ -975,19 +975,19 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
+ while (!list_empty(&dep->started_list)) {
+ req = next_request(&dep->started_list);
+
+- dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
++ dwc3_gadget_giveback(dep, req, status);
+ }
+
+ while (!list_empty(&dep->pending_list)) {
+ req = next_request(&dep->pending_list);
+
+- dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
++ dwc3_gadget_giveback(dep, req, status);
+ }
+
+ while (!list_empty(&dep->cancelled_list)) {
+ req = next_request(&dep->cancelled_list);
+
+- dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
++ dwc3_gadget_giveback(dep, req, status);
+ }
+ }
+
+@@ -1022,7 +1022,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
+ dep->endpoint.desc = NULL;
+ }
+
+- dwc3_remove_requests(dwc, dep);
++ dwc3_remove_requests(dwc, dep, -ECONNRESET);
+
+ dep->stream_capable = false;
+ dep->type = 0;
+@@ -2350,7 +2350,7 @@ static void dwc3_stop_active_transfers(struct dwc3 *dwc)
+ if (!dep)
+ continue;
+
+- dwc3_remove_requests(dwc, dep);
++ dwc3_remove_requests(dwc, dep, -ESHUTDOWN);
+ }
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 8ec87b7092f471ffe3d9cbacedb32cc8e0b82820 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Nov 2022 18:45:44 -0800
+Subject: usb: dwc3: gadget: Return -ESHUTDOWN on ep disable
+
+From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+
+[ Upstream commit ffb9da4a04c69567bad717707b6fdfbc4c216ef4 ]
+
+The usb_request API clearly noted that removed requests due to disabled
+endpoint should have -ESHUTDOWN status returned. Don't change this
+behavior.
+
+Fixes: b44c0e7fef51 ("usb: dwc3: gadget: conditionally remove requests")
+Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+Link: https://lore.kernel.org/r/3421859485cb32d77e2068549679a6c07a7797bc.1667875427.git.Thinh.Nguyen@synopsys.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: f90f5afd5083 ("usb: dwc3: gadget: Clear ep descriptor last")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index ffff6f41d2ac..448c8e6bc99d 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1022,7 +1022,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
+ dep->endpoint.desc = NULL;
+ }
+
+- dwc3_remove_requests(dwc, dep, -ECONNRESET);
++ dwc3_remove_requests(dwc, dep, -ESHUTDOWN);
+
+ dep->stream_capable = false;
+ dep->type = 0;
+--
+2.35.1
+