From: Austin Zheng Date: Fri, 3 Oct 2025 14:39:49 +0000 (-0400) Subject: drm/amd/display: Refactor VActive implementation X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1b3246352af8761f00c98f4ee9502e91634c33ed;p=thirdparty%2Fkernel%2Flinux.git drm/amd/display: Refactor VActive implementation [Why & How] Refactors VActive accounting in PMO, and breaks down fill time requirement by P-State type as it can result in drasitcally different bandwidth requirements depending on the blackout length. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h index da8e5c8b22446..35aa954248cdc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h @@ -87,6 +87,15 @@ enum dml2_output_link_dp_rate { dml2_dp_rate_uhbr20 = 6 }; +enum dml2_pstate_type { + dml2_pstate_type_uclk = 0, + dml2_pstate_type_fclk = 1, + dml2_pstate_type_ppt = 2, + dml2_pstate_type_temp_read = 3, + dml2_pstate_type_dummy_pstate = 4, + dml2_pstate_type_count = 5 +}; + enum dml2_uclk_pstate_change_strategy { dml2_uclk_pstate_change_strategy_auto = 0, dml2_uclk_pstate_change_strategy_force_vactive = 1, @@ -393,8 +402,7 @@ struct dml2_plane_parameters { // reserved_vblank_time_ns is the minimum time to reserve in vblank for Twait // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override. long reserved_vblank_time_ns; - unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay - unsigned int vactive_latency_to_hide_for_pstate_admissibility_us; + unsigned int max_vactive_det_fill_delay_us[dml2_pstate_type_count]; // 0 = no reserved time, +ve = explicit max delay unsigned int gpuvm_min_page_size_kbytes; unsigned int hostvm_min_page_size_kbytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h index e87d04a734b51..1fbc520c25404 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h @@ -191,7 +191,7 @@ struct dml2_ip_capabilities { unsigned int subvp_prefetch_end_to_mall_start_us; unsigned int subvp_fw_processing_delay; unsigned int max_vactive_det_fill_delay_us; - unsigned int ppt_max_allow_delay_ns; + unsigned int ppt_max_allow_delay_us; unsigned int temp_read_max_allow_delay_us; unsigned int dummy_pstate_max_allow_delay_us; /* FAMS2 delays */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h index 8646ce5f1c01f..d2584b00a19c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h @@ -195,14 +195,6 @@ struct dml2_mcache_surface_allocation { } informative; }; -enum dml2_pstate_type { - dml2_pstate_type_uclk, - dml2_pstate_type_ppt, - dml2_pstate_type_temp_read, - dml2_pstate_type_dummy_pstate, - dml2_pstate_type_count -}; - enum dml2_pstate_method { dml2_pstate_method_na = 0, /* hw exclusive modes */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index e7a0f46e12898..df81bd963bb88 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -6972,7 +6972,7 @@ static void calculate_bytes_to_fetch_required_to_hide_latency( stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index; - dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us / + dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us[0] / ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total / (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0)); @@ -7069,9 +7069,9 @@ static void calculate_excess_vactive_bandwidth_required( excess_vactive_fill_bw_l[plane_index] = 0.0; excess_vactive_fill_bw_c[plane_index] = 0.0; - if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) { - excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us; - excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us; + if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] > 0) { + excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk]; + excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk]; } } } @@ -9051,11 +9051,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC; calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY; calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC; - calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; /* outputs */ - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk]; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk]; calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); @@ -9063,8 +9063,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_excess_vactive_bandwidth_required( display_cfg, mode_lib->ms.num_active_planes, - s->pstate_bytes_required_l, - s->pstate_bytes_required_c, + s->pstate_bytes_required_l[dml2_pstate_type_uclk], + s->pstate_bytes_required_c[dml2_pstate_type_uclk], /* outputs */ mode_lib->ms.excess_vactive_fill_bw_l, mode_lib->ms.excess_vactive_fill_bw_c); @@ -9506,8 +9506,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_vactive_det_fill_latency( display_cfg, mode_lib->ms.num_active_planes, - s->pstate_bytes_required_l, - s->pstate_bytes_required_c, + s->pstate_bytes_required_l[dml2_pstate_type_uclk], + s->pstate_bytes_required_c[dml2_pstate_type_uclk], mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, mode_lib->ms.vactive_sw_bw_l, @@ -9515,7 +9515,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.surface_avg_vactive_required_bw, mode_lib->ms.surface_peak_required_bw, /* outputs */ - mode_lib->ms.dram_change_vactive_det_fill_delay_us); + mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk]); #ifdef __DML_VBA_DEBUG__ DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); @@ -11009,11 +11009,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC; calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY; calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC; - calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; /* outputs */ - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk]; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk]; calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); @@ -11021,8 +11021,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_excess_vactive_bandwidth_required( display_cfg, s->num_active_planes, - s->pstate_bytes_required_l, - s->pstate_bytes_required_c, + s->pstate_bytes_required_l[dml2_pstate_type_uclk], + s->pstate_bytes_required_c[dml2_pstate_type_uclk], /* outputs */ mode_lib->mp.excess_vactive_fill_bw_l, mode_lib->mp.excess_vactive_fill_bw_c); @@ -12943,7 +12943,8 @@ void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *displ out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; - out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]); + out->vactive_det_fill_delay_us[dml2_pstate_type_uclk] = + (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[plane_idx][dml2_pstate_type_uclk]); } void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h index 6d13d4c9b69a1..1087a8c926ff1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h @@ -593,7 +593,7 @@ struct dml2_core_internal_mode_support { double VActiveLatencyHidingMargin[DML2_MAX_PLANES]; double VActiveLatencyHidingUs[DML2_MAX_PLANES]; unsigned int MaxVStartupLines[DML2_MAX_PLANES]; - double dram_change_vactive_det_fill_delay_us[DML2_MAX_PLANES]; + double pstate_vactive_det_fill_delay_us[dml2_pstate_type_count][DML2_MAX_PLANES]; unsigned int num_mcaches_l[DML2_MAX_PLANES]; unsigned int mcache_row_bytes_l[DML2_MAX_PLANES]; @@ -623,8 +623,8 @@ struct dml2_core_internal_mode_support { unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; - unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; - unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_l[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[dml2_pstate_type_count][DML2_MAX_PLANES]; unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; @@ -1138,8 +1138,8 @@ struct dml2_core_calcs_mode_support_locals { unsigned int cursor_bytes[DML2_MAX_PLANES]; bool stream_visited[DML2_MAX_PLANES]; - unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; - unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_l[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[dml2_pstate_type_count][DML2_MAX_PLANES]; double prefetch_sw_bytes[DML2_MAX_PLANES]; double Tpre_rounded[DML2_MAX_PLANES]; @@ -1230,8 +1230,8 @@ struct dml2_core_calcs_mode_programming_locals { double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES]; - unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; - unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_l[dml2_pstate_type_count][DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[dml2_pstate_type_count][DML2_MAX_PLANES]; double prefetch_sw_bytes[DML2_MAX_PLANES]; double Tpre_rounded[DML2_MAX_PLANES]; @@ -2253,7 +2253,7 @@ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params unsigned int *swath_width_c; unsigned int *swath_height_l; unsigned int *swath_height_c; - double latency_to_hide_us; + double latency_to_hide_us[DML2_MAX_PLANES]; /* outputs */ unsigned int *bytes_required_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index abd210401fe22..c26e100fcaf2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1087,7 +1087,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, /* check required stretch is allowed */ if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 && - stream_pstate_meta->method_drr.stretched_vtotal - stream_pstate_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { + stream_pstate_meta->method_drr.stretched_vtotal - stream_pstate_meta->nom_vtotal > (int)stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { return false; } } @@ -1669,15 +1669,15 @@ static int get_vactive_pstate_margin(const struct display_configuation_with_meta return min_vactive_margin_us; } -static unsigned int get_vactive_det_fill_latency_delay_us(const struct display_configuation_with_meta *display_cfg, int plane_mask) +static int get_vactive_det_fill_latency_delay_us(const struct display_configuation_with_meta *display_cfg, int plane_mask) { unsigned char i; - unsigned int max_vactive_fill_us = 0; + int max_vactive_fill_us = 0; for (i = 0; i < DML2_MAX_PLANES; i++) { if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us > max_vactive_fill_us) - max_vactive_fill_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us; + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].vactive_det_fill_delay_us[dml2_pstate_type_uclk] > max_vactive_fill_us) + max_vactive_fill_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].vactive_det_fill_delay_us[dml2_pstate_type_uclk]; } } @@ -2095,7 +2095,7 @@ static void setup_planes_for_vactive_by_mask(struct display_configuation_with_me display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; if (!pmo->options->disable_vactive_det_fill_bw_pad) { - display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = + display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] = (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); } } @@ -2116,7 +2116,7 @@ static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_wit display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr; if (!pmo->options->disable_vactive_det_fill_bw_pad) { - display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = + display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] = (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h index 9f562f0c47970..1a6c0727cd2af 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h @@ -152,7 +152,7 @@ struct core_plane_support_info { int active_latency_hiding_us; int mall_svp_size_requirement_ways; int nominal_vblank_pstate_latency_hiding_us; - unsigned int dram_change_vactive_det_fill_delay_us; + int vactive_det_fill_delay_us[dml2_pstate_type_count]; }; struct core_stream_support_info { @@ -209,6 +209,7 @@ struct dml2_core_mode_support_result { unsigned int uclk_pstate_supported; unsigned int fclk_pstate_supported; + struct dml2_core_internal_watermarks watermarks; } global; struct { @@ -257,8 +258,8 @@ struct dml2_implicit_svp_meta { struct dml2_pstate_per_method_common_meta { /* generic params */ - unsigned int allow_start_otg_vline; - unsigned int allow_end_otg_vline; + int allow_start_otg_vline; + int allow_end_otg_vline; /* scheduling params */ double allow_time_us; double disallow_time_us; @@ -268,39 +269,44 @@ struct dml2_pstate_per_method_common_meta { struct dml2_pstate_meta { bool valid; double otg_vline_time_us; - unsigned int scheduling_delay_otg_vlines; - unsigned int vertical_interrupt_ack_delay_otg_vlines; - unsigned int allow_to_target_delay_otg_vlines; - unsigned int contention_delay_otg_vlines; - unsigned int min_allow_width_otg_vlines; - unsigned int nom_vtotal; - unsigned int vblank_start; + int scheduling_delay_otg_vlines; + int vertical_interrupt_ack_delay_otg_vlines; + int allow_to_target_delay_otg_vlines; + int contention_delay_otg_vlines; + int min_allow_width_otg_vlines; + int nom_vtotal; + int vblank_start; double nom_refresh_rate_hz; double nom_frame_time_us; - unsigned int max_vtotal; + int max_vtotal; double min_refresh_rate_hz; double max_frame_time_us; - unsigned int blackout_otg_vlines; + int blackout_otg_vlines; + int max_allow_delay_otg_vlines; + double nom_vblank_time_us; struct { double max_vactive_det_fill_delay_us; - unsigned int max_vactive_det_fill_delay_otg_vlines; + double vactive_latency_hiding_us; + double reserved_vblank_required_us; + int max_vactive_det_fill_delay_otg_vlines; + int reserved_blank_required_vlines; struct dml2_pstate_per_method_common_meta common; } method_vactive; struct { struct dml2_pstate_per_method_common_meta common; } method_vblank; struct { - unsigned int programming_delay_otg_vlines; - unsigned int df_throttle_delay_otg_vlines; - unsigned int prefetch_to_mall_delay_otg_vlines; + int programming_delay_otg_vlines; + int df_throttle_delay_otg_vlines; + int prefetch_to_mall_delay_otg_vlines; unsigned long phantom_vactive; unsigned long phantom_vfp; unsigned long phantom_vtotal; struct dml2_pstate_per_method_common_meta common; } method_subvp; struct { - unsigned int programming_delay_otg_vlines; - unsigned int stretched_vtotal; + int programming_delay_otg_vlines; + int stretched_vtotal; struct dml2_pstate_per_method_common_meta common; } method_drr; };