--- /dev/null
+From aa956140b0a3c9d836aef216458ff3539ecc3e74 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Dec 2023 00:01:15 -0500
+Subject: drm/amd/display: Add dpia display mode validation logic
+
+From: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
+
+[ Upstream commit 59f1622a5f05d948a7c665a458a3dd76ba73015e ]
+
+[Why]
+If bandwidth allocation feature is enabled, connection manager wont
+limit the dp tunnel bandwidth. So, need to do display mode validation
+for streams on dpia links to avoid oversubscription of dp tunnel
+bandwidth.
+
+[How]
+- To read non reduced link rate and lane count and update
+ reported link capability.
+- To calculate the bandwidth required for streams of dpia links
+ per host router and validate against the allocated bandwidth for
+ the host router.
+
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Reviewed-by: PeiChen Huang <peichen.huang@amd.com>
+Reviewed-by: Aric Cyr <aric.cyr@amd.com>
+Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Signed-off-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 0484e05d048b ("drm/amd/display: fixed integer types and null check locations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../drm/amd/display/dc/core/dc_link_exports.c | 2 +-
+ drivers/gpu/drm/amd/display/dc/dc.h | 4 +-
+ drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 6 +
+ drivers/gpu/drm/amd/display/dc/dc_types.h | 2 +
+ .../dc/link/protocols/link_dp_dpia_bw.c | 130 +++++++++++++-----
+ 5 files changed, 104 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+index ed94187c2afa2..f365773d57148 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+@@ -497,7 +497,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
+ link->dc->link_srv->enable_hpd_filter(link, enable);
+ }
+
+-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
++bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
+ {
+ return dc->link_srv->validate_dpia_bandwidth(streams, count);
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 3f33740e2f659..5f2eac868b747 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -2116,11 +2116,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
+ *
+ * @dc: pointer to dc struct
+ * @stream: pointer to all possible streams
+- * @num_streams: number of valid DPIA streams
++ * @count: number of valid DPIA streams
+ *
+ * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
+ */
+-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams,
++bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams,
+ const unsigned int count);
+
+ /* Sink Interfaces - A sink corresponds to a display output device */
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+index cfaa39c5dd16b..83719f5bea495 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+@@ -1433,6 +1433,12 @@ struct dp_trace {
+ #ifndef DP_TUNNELING_STATUS
+ #define DP_TUNNELING_STATUS 0xE0025 /* 1.4a */
+ #endif
++#ifndef DP_TUNNELING_MAX_LINK_RATE
++#define DP_TUNNELING_MAX_LINK_RATE 0xE0028 /* 1.4a */
++#endif
++#ifndef DP_TUNNELING_MAX_LANE_COUNT
++#define DP_TUNNELING_MAX_LANE_COUNT 0xE0029 /* 1.4a */
++#endif
+ #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
+ #define DPTX_BW_ALLOCATION_MODE_CONTROL 0xE0030 /* 1.4a */
+ #endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
+index accffba5a6834..19b7314811ae2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
+@@ -1121,6 +1121,8 @@ struct dc_dpia_bw_alloc {
+ int bw_granularity; // BW Granularity
+ bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM
+ bool response_ready; // Response ready from the CM side
++ uint8_t nrd_max_lane_count; // Non-reduced max lane count
++ uint8_t nrd_max_link_rate; // Non-reduced max link rate
+ };
+
+ #define MAX_SINKS_PER_LINK 4
+diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+index d6e1f969bfd52..a7aa8c9da868f 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+@@ -59,6 +59,7 @@ static void reset_bw_alloc_struct(struct dc_link *link)
+ link->dpia_bw_alloc_config.estimated_bw = 0;
+ link->dpia_bw_alloc_config.bw_granularity = 0;
+ link->dpia_bw_alloc_config.response_ready = false;
++ link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+ }
+
+ #define BW_GRANULARITY_0 4 // 0.25 Gbps
+@@ -104,6 +105,32 @@ static int get_estimated_bw(struct dc_link *link)
+ return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+ }
+
++static int get_non_reduced_max_link_rate(struct dc_link *link)
++{
++ uint8_t nrd_max_link_rate = 0;
++
++ core_link_read_dpcd(
++ link,
++ DP_TUNNELING_MAX_LINK_RATE,
++ &nrd_max_link_rate,
++ sizeof(uint8_t));
++
++ return nrd_max_link_rate;
++}
++
++static int get_non_reduced_max_lane_count(struct dc_link *link)
++{
++ uint8_t nrd_max_lane_count = 0;
++
++ core_link_read_dpcd(
++ link,
++ DP_TUNNELING_MAX_LANE_COUNT,
++ &nrd_max_lane_count,
++ sizeof(uint8_t));
++
++ return nrd_max_lane_count;
++}
++
+ /*
+ * Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
+ * granuality, Driver_ID, CM_Group, & populate the BW allocation structs
+@@ -111,13 +138,20 @@ static int get_estimated_bw(struct dc_link *link)
+ */
+ static void init_usb4_bw_struct(struct dc_link *link)
+ {
+- // Init the known values
++ reset_bw_alloc_struct(link);
++
++ /* init the known values */
+ link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
+ link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
++ link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link);
++ link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link);
+
+ DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
+ __func__, link->dpia_bw_alloc_config.bw_granularity,
+ link->dpia_bw_alloc_config.estimated_bw);
++ DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n",
++ __func__, link->dpia_bw_alloc_config.nrd_max_link_rate,
++ link->dpia_bw_alloc_config.nrd_max_lane_count);
+ }
+
+ static uint8_t get_lowest_dpia_index(struct dc_link *link)
+@@ -142,39 +176,50 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link)
+ }
+
+ /*
+- * Get the Max Available BW or Max Estimated BW for each Host Router
++ * Get the maximum dp tunnel banwidth of host router
+ *
+- * @link: pointer to the dc_link struct instance
+- * @type: ESTIMATD BW or MAX AVAILABLE BW
++ * @dc: pointer to the dc struct instance
++ * @hr_index: host router index
+ *
+- * return: response_ready flag from dc_link struct
++ * return: host router maximum dp tunnel bandwidth
+ */
+-static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
++static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index)
+ {
+- const struct dc *dc_struct = link->dc;
+- uint8_t lowest_dpia_index = get_lowest_dpia_index(link);
+- uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0;
+- struct dc_link *link_temp;
++ uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]);
++ uint8_t hr_index_temp = 0;
++ struct dc_link *link_dpia_primary, *link_dpia_secondary;
+ int total_bw = 0;
+- int i;
+-
+- for (i = 0; i < MAX_PIPES * 2; ++i) {
+
+- if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+- continue;
++ for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) {
+
+- link_temp = dc_struct->links[i];
+- if (!link_temp || !link_temp->hpd_status)
++ if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ continue;
+
+- idx_temp = (link_temp->link_index - lowest_dpia_index) / 2;
+-
+- if (idx_temp == idx) {
+-
+- if (type == HOST_ROUTER_BW_ESTIMATED)
+- total_bw += link_temp->dpia_bw_alloc_config.estimated_bw;
+- else if (type == HOST_ROUTER_BW_ALLOCATED)
+- total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw;
++ hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2;
++
++ if (hr_index_temp == hr_index) {
++ link_dpia_primary = dc->links[i];
++ link_dpia_secondary = dc->links[i + 1];
++
++ /**
++ * If BW allocation enabled on both DPIAs, then
++ * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary)
++ * otherwise HR BW = Estimated(bw alloc enabled dpia)
++ */
++ if ((link_dpia_primary->hpd_status &&
++ link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
++ (link_dpia_secondary->hpd_status &&
++ link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
++ total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
++ link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
++ } else if (link_dpia_primary->hpd_status &&
++ link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
++ total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
++ } else if (link_dpia_secondary->hpd_status &&
++ link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) {
++ total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw;
++ }
++ break;
+ }
+ }
+
+@@ -194,7 +239,6 @@ static void dpia_bw_alloc_unplug(struct dc_link *link)
+ if (link) {
+ DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n",
+ __func__, link->link_index);
+- link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+ reset_bw_alloc_struct(link);
+ }
+ }
+@@ -397,7 +441,7 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+ if (!timeout)
+ ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
+ else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
+- ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
++ ret = link->dpia_bw_alloc_config.sink_allocated_bw;
+ }
+ //2. Cold Unplug
+ else if (!link->hpd_status)
+@@ -439,29 +483,41 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r
+ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
+ {
+ bool ret = true;
+- int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 };
+- uint8_t lowest_dpia_index = 0, dpia_index = 0;
+- uint8_t i;
++ int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0;
++ uint8_t lowest_dpia_index, i, hr_index;
+
+ if (!num_dpias || num_dpias > MAX_DPIA_NUM)
+ return ret;
+
+- //Get total Host Router BW & Validate against each Host Router max BW
++ lowest_dpia_index = get_lowest_dpia_index(link[0]);
++
++ /* get total Host Router BW with granularity for the given modes */
+ for (i = 0; i < num_dpias; ++i) {
++ int granularity_Gbps = 0;
++ int bw_granularity = 0;
+
+ if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled)
+ continue;
+
+- lowest_dpia_index = get_lowest_dpia_index(link[i]);
+ if (link[i]->link_index < lowest_dpia_index)
+ continue;
+
+- dpia_index = (link[i]->link_index - lowest_dpia_index) / 2;
+- bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i];
+- if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) {
++ granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity);
++ bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps +
++ ((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0);
+
+- ret = false;
+- break;
++ hr_index = (link[i]->link_index - lowest_dpia_index) / 2;
++ bw_needed_per_hr[hr_index] += bw_granularity;
++ }
++
++ /* validate against each Host Router max BW */
++ for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) {
++ if (bw_needed_per_hr[hr_index]) {
++ host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index);
++ if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) {
++ ret = false;
++ break;
++ }
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 42d08dd1076db03268a7dc8d3e95523b099be90f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jan 2024 16:40:37 -0500
+Subject: drm/amd/display: fixed integer types and null check locations
+
+From: Sohaib Nadeem <sohaib.nadeem@amd.com>
+
+[ Upstream commit 0484e05d048b66d01d1f3c1d2306010bb57d8738 ]
+
+[why]:
+issues fixed:
+- comparison with wider integer type in loop condition which can cause
+infinite loops
+- pointer dereference before null check
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Josip Pavic <josip.pavic@amd.com>
+Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Signed-off-by: Sohaib Nadeem <sohaib.nadeem@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../gpu/drm/amd/display/dc/bios/bios_parser2.c | 16 ++++++++++------
+ .../drm/amd/display/dc/link/link_validation.c | 2 +-
+ 2 files changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+index bbf2a465f400b..4c3c4c8de1cfc 100644
+--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+@@ -1860,19 +1860,21 @@ static enum bp_result get_firmware_info_v3_2(
+ /* Vega12 */
+ smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
+ DATA_TABLES(smu_info));
+- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+ if (!smu_info_v3_2)
+ return BP_RESULT_BADBIOSTABLE;
+
++ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
++
+ info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
+ } else if (revision.minor == 3) {
+ /* Vega20 */
+ smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
+ DATA_TABLES(smu_info));
+- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+ if (!smu_info_v3_3)
+ return BP_RESULT_BADBIOSTABLE;
+
++ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
++
+ info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
+ }
+
+@@ -2435,10 +2437,11 @@ static enum bp_result get_integrated_info_v11(
+ info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
+ DATA_TABLES(integratedsysteminfo));
+
+- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+ if (info_v11 == NULL)
+ return BP_RESULT_BADBIOSTABLE;
+
++ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
++
+ info->gpu_cap_info =
+ le32_to_cpu(info_v11->gpucapinfo);
+ /*
+@@ -2650,11 +2653,12 @@ static enum bp_result get_integrated_info_v2_1(
+
+ info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
+ DATA_TABLES(integratedsysteminfo));
+- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+
+ if (info_v2_1 == NULL)
+ return BP_RESULT_BADBIOSTABLE;
+
++ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
++
+ info->gpu_cap_info =
+ le32_to_cpu(info_v2_1->gpucapinfo);
+ /*
+@@ -2812,11 +2816,11 @@ static enum bp_result get_integrated_info_v2_2(
+ info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
+ DATA_TABLES(integratedsysteminfo));
+
+- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+-
+ if (info_v2_2 == NULL)
+ return BP_RESULT_BADBIOSTABLE;
+
++ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
++
+ info->gpu_cap_info =
+ le32_to_cpu(info_v2_2->gpucapinfo);
+ /*
+diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+index 8fe66c3678508..5b0bc7f6a188c 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
++++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+@@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un
+ struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
+ int num_dpias = 0;
+
+- for (uint8_t i = 0; i < num_streams; ++i) {
++ for (unsigned int i = 0; i < num_streams; ++i) {
+ if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
+ /* new dpia sst stream, check whether it exceeds max dpia */
+ if (num_dpias >= MAX_DPIA_NUM)
+--
+2.43.0
+
--- /dev/null
+From 351be241be9a81775f7181e1bcf3cd6eabeb70a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Dec 2023 23:16:34 +0800
+Subject: drm/amd/display: Request usb4 bw for mst streams
+
+From: Peichen Huang <peichen.huang@amd.com>
+
+[ Upstream commit 5f3bce13266e6fe2f7a46f94d8bc94d5274e276b ]
+
+[WHY]
+When usb4 bandwidth allocation mode is enabled, driver need to request
+bandwidth from connection manager. For mst link, the requested
+bandwidth should be big enough for all remote streams.
+
+[HOW]
+- If mst link, the requested bandwidth should be the sum of all mst
+ streams bandwidth added with dp MTPH overhead.
+- Allocate/deallcate usb4 bandwidth when setting dpms on/off.
+- When doing display mode validation, driver also need to consider total
+ bandwidth of all mst streams for mst link.
+
+Reviewed-by: Cruise Hung <cruise.hung@amd.com>
+Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Signed-off-by: Peichen Huang <peichen.huang@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 0484e05d048b ("drm/amd/display: fixed integer types and null check locations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc_types.h | 12 ++--
+ .../gpu/drm/amd/display/dc/link/link_dpms.c | 42 ++++++++++---
+ .../drm/amd/display/dc/link/link_validation.c | 60 +++++++++++++++----
+ .../dc/link/protocols/link_dp_dpia_bw.c | 59 +++++++++++++-----
+ .../dc/link/protocols/link_dp_dpia_bw.h | 9 +++
+ 5 files changed, 144 insertions(+), 38 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
+index 19b7314811ae2..cc173ecf78e0c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
+@@ -1110,23 +1110,25 @@ struct dc_panel_config {
+ } ilr;
+ };
+
++#define MAX_SINKS_PER_LINK 4
++
+ /*
+ * USB4 DPIA BW ALLOCATION STRUCTS
+ */
+ struct dc_dpia_bw_alloc {
+- int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already
+- int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
+- int sink_max_bw; // The Max BW that sink can require/support
++ int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
++ int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already
++ int link_max_bw; // The Max BW that link can require/support
++ int allocated_bw; // The Actual Allocated BW for this DPIA
+ int estimated_bw; // The estimated available BW for this DPIA
+ int bw_granularity; // BW Granularity
++ int dp_overhead; // DP overhead in dp tunneling
+ bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM
+ bool response_ready; // Response ready from the CM side
+ uint8_t nrd_max_lane_count; // Non-reduced max lane count
+ uint8_t nrd_max_link_rate; // Non-reduced max link rate
+ };
+
+-#define MAX_SINKS_PER_LINK 4
+-
+ enum dc_hpd_enable_select {
+ HPD_EN_FOR_ALL_EDP = 0,
+ HPD_EN_FOR_PRIMARY_EDP_ONLY,
+diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+index b9768cd9b8a07..4901e27f678bc 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
++++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+@@ -2071,17 +2071,11 @@ static enum dc_status enable_link_dp(struct dc_state *state,
+ }
+ }
+
+- /*
+- * If the link is DP-over-USB4 do the following:
+- * - Train with fallback when enabling DPIA link. Conventional links are
++ /* Train with fallback when enabling DPIA link. Conventional links are
+ * trained with fallback during sink detection.
+- * - Allocate only what the stream needs for bw in Gbps. Inform the CM
+- * in case stream needs more or less bw from what has been allocated
+- * earlier at plug time.
+ */
+- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
++ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ do_fallback = true;
+- }
+
+ /*
+ * Temporary w/a to get DP2.0 link rates to work with SST.
+@@ -2263,6 +2257,32 @@ static enum dc_status enable_link(
+ return status;
+ }
+
++static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw)
++{
++ return true;
++}
++
++static bool allocate_usb4_bandwidth(struct dc_stream_state *stream)
++{
++ bool ret;
++
++ int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing,
++ dc_link_get_highest_encoding_format(stream->sink->link));
++
++ ret = allocate_usb4_bandwidth_for_stream(stream, bw);
++
++ return ret;
++}
++
++static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream)
++{
++ bool ret;
++
++ ret = allocate_usb4_bandwidth_for_stream(stream, 0);
++
++ return ret;
++}
++
+ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
+ {
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+@@ -2299,6 +2319,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
+ update_psp_stream_config(pipe_ctx, true);
+ dc->hwss.blank_stream(pipe_ctx);
+
++ if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
++ deallocate_usb4_bandwidth(pipe_ctx->stream);
++
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+ deallocate_mst_payload(pipe_ctx);
+ else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
+@@ -2520,6 +2543,9 @@ void link_set_dpms_on(
+ }
+ }
+
++ if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
++ allocate_usb4_bandwidth(pipe_ctx->stream);
++
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+ allocate_mst_payload(pipe_ctx);
+ else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
+diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+index b45fda96eaf64..8fe66c3678508 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
++++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+@@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing(
+ return DC_OK;
+ }
+
++/*
++ * This function calculates the bandwidth required for the stream timing
++ * and aggregates the stream bandwidth for the respective dpia link
++ *
++ * @stream: pointer to the dc_stream_state struct instance
++ * @num_streams: number of streams to be validated
++ *
++ * return: true if validation is succeeded
++ */
+ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams)
+ {
+- bool ret = true;
+- int bw_needed[MAX_DPIA_NUM];
+- struct dc_link *link[MAX_DPIA_NUM];
+-
+- if (!num_streams || num_streams > MAX_DPIA_NUM)
+- return ret;
++ int bw_needed[MAX_DPIA_NUM] = {0};
++ struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
++ int num_dpias = 0;
+
+ for (uint8_t i = 0; i < num_streams; ++i) {
++ if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
++ /* new dpia sst stream, check whether it exceeds max dpia */
++ if (num_dpias >= MAX_DPIA_NUM)
++ return false;
+
+- link[i] = stream[i].link;
+- bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+- dc_link_get_highest_encoding_format(link[i]));
++ dpia_link[num_dpias] = stream[i].link;
++ bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
++ dc_link_get_highest_encoding_format(dpia_link[num_dpias]));
++ num_dpias++;
++ } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
++ uint8_t j = 0;
++ /* check whether its a known dpia link */
++ for (; j < num_dpias; ++j) {
++ if (dpia_link[j] == stream[i].link)
++ break;
++ }
++
++ if (j == num_dpias) {
++ /* new dpia mst stream, check whether it exceeds max dpia */
++ if (num_dpias >= MAX_DPIA_NUM)
++ return false;
++ else {
++ dpia_link[j] = stream[i].link;
++ num_dpias++;
++ }
++ }
++
++ bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
++ dc_link_get_highest_encoding_format(dpia_link[j]));
++ }
+ }
+
+- ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
++ /* Include dp overheads */
++ for (uint8_t i = 0; i < num_dpias; ++i) {
++ int dp_overhead = 0;
++
++ dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]);
++ bw_needed[i] += dp_overhead;
++ }
+
+- return ret;
++ return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias);
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+index a7aa8c9da868f..4ef1a6a1d1295 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+@@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
+ static void reset_bw_alloc_struct(struct dc_link *link)
+ {
+ link->dpia_bw_alloc_config.bw_alloc_enabled = false;
+- link->dpia_bw_alloc_config.sink_verified_bw = 0;
+- link->dpia_bw_alloc_config.sink_max_bw = 0;
++ link->dpia_bw_alloc_config.link_verified_bw = 0;
++ link->dpia_bw_alloc_config.link_max_bw = 0;
++ link->dpia_bw_alloc_config.allocated_bw = 0;
+ link->dpia_bw_alloc_config.estimated_bw = 0;
+ link->dpia_bw_alloc_config.bw_granularity = 0;
++ link->dpia_bw_alloc_config.dp_overhead = 0;
+ link->dpia_bw_alloc_config.response_ready = false;
+- link->dpia_bw_alloc_config.sink_allocated_bw = 0;
++ link->dpia_bw_alloc_config.nrd_max_lane_count = 0;
++ link->dpia_bw_alloc_config.nrd_max_link_rate = 0;
++ for (int i = 0; i < MAX_SINKS_PER_LINK; i++)
++ link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0;
++ DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index);
+ }
+
+ #define BW_GRANULARITY_0 4 // 0.25 Gbps
+@@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in
+ link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
+ (link_dpia_secondary->hpd_status &&
+ link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
+- total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+- link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
++ total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
++ link_dpia_secondary->dpia_bw_alloc_config.allocated_bw;
+ } else if (link_dpia_primary->hpd_status &&
+ link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
+ total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
+@@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
+
+ /* Error check whether requested and allocated are equal */
+ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+- if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) {
++ if (req_bw == link->dpia_bw_alloc_config.allocated_bw) {
+ DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
+ __func__, link->link_index);
+ }
+@@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
+ DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
+ __func__, link->link_index);
+ DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
+- __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed);
++ __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed);
+
+- link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed;
++ link->dpia_bw_alloc_config.allocated_bw = bw_needed;
+
+ link->dpia_bw_alloc_config.response_ready = true;
+ break;
+@@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+ if (link->hpd_status && peak_bw > 0) {
+
+ // If DP over USB4 then we need to check BW allocation
+- link->dpia_bw_alloc_config.sink_max_bw = peak_bw;
+- set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
++ link->dpia_bw_alloc_config.link_max_bw = peak_bw;
++ set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw);
+
+ do {
+ if (timeout > 0)
+@@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+
+ if (!timeout)
+ ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
+- else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
+- ret = link->dpia_bw_alloc_config.sink_allocated_bw;
++ else if (link->dpia_bw_alloc_config.allocated_bw > 0)
++ ret = link->dpia_bw_alloc_config.allocated_bw;
+ }
+ //2. Cold Unplug
+ else if (!link->hpd_status)
+@@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+ out:
+ return ret;
+ }
+-
+ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
+ {
+ bool ret = false;
+@@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r
+
+ DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
+ __func__, link->link_index, link->hpd_status,
+- link->dpia_bw_alloc_config.sink_allocated_bw, req_bw);
++ link->dpia_bw_alloc_config.allocated_bw, req_bw);
+
+ if (!get_bw_alloc_proceed_flag(link))
+ goto out;
+@@ -523,3 +528,29 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const
+
+ return ret;
+ }
++
++int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link)
++{
++ int dp_overhead = 0, link_mst_overhead = 0;
++
++ if (!get_bw_alloc_proceed_flag((link)))
++ return dp_overhead;
++
++ /* if its mst link, add MTPH overhead */
++ if ((link->type == dc_connection_mst_branch) &&
++ !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
++ /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH
++ * MST overhead is 1/64 of link bandwidth (excluding any overhead)
++ */
++ const struct dc_link_settings *link_cap =
++ dc_link_get_link_cap(link);
++ uint32_t link_bw_in_kbps =
++ link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8;
++ link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0);
++ }
++
++ /* add all the overheads */
++ dp_overhead = link_mst_overhead;
++
++ return dp_overhead;
++}
+diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+index 981bc4eb6120e..3b6d8494f9d5d 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+@@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
+ */
+ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias);
+
++/*
++ * Obtain all the DP overheads in dp tunneling for the dpia link
++ *
++ * @link: pointer to the dc_link struct instance
++ *
++ * return: DP overheads in DP tunneling
++ */
++int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link);
++
+ #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
+--
+2.43.0
+
--- /dev/null
+From 62b7728c7f2cf8fbb7c4c0fd8611b66e5ec79647 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Dec 2023 13:47:22 +0100
+Subject: mptcp: add CurrEstab MIB counter support
+
+From: Geliang Tang <geliang.tang@linux.dev>
+
+[ Upstream commit d9cd27b8cd191133e287e5de107f971136abe8a2 ]
+
+Add a new MIB counter named MPTCP_MIB_CURRESTAB to count current
+established MPTCP connections, similar to TCP_MIB_CURRESTAB. This is
+useful to quickly list the number of MPTCP connections without having to
+iterate over all of them.
+
+This patch adds a new helper function mptcp_set_state(): if the state
+switches from or to ESTABLISHED state, this newly added counter is
+incremented. This helper is going to be used in the following patch.
+
+Similar to MPTCP_INC_STATS(), a new helper called MPTCP_DEC_STATS() is
+also needed to decrement a MIB counter.
+
+Signed-off-by: Geliang Tang <geliang.tang@linux.dev>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/mib.c | 1 +
+ net/mptcp/mib.h | 8 ++++++++
+ net/mptcp/protocol.c | 18 ++++++++++++++++++
+ net/mptcp/protocol.h | 1 +
+ 4 files changed, 28 insertions(+)
+
+diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
+index a0990c365a2ea..c30405e768337 100644
+--- a/net/mptcp/mib.c
++++ b/net/mptcp/mib.c
+@@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
+ SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
+ SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
++ SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_SENTINEL
+ };
+
+diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
+index cae71d9472529..dd7fd1f246b5f 100644
+--- a/net/mptcp/mib.h
++++ b/net/mptcp/mib.h
+@@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
+ * conflict with another subflow while updating msk rcv wnd
+ */
+ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
++ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ __MPTCP_MIB_MAX
+ };
+
+@@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
+ __SNMP_INC_STATS(net->mib.mptcp_statistics, field);
+ }
+
++static inline void MPTCP_DEC_STATS(struct net *net,
++ enum linux_mptcp_mib_field field)
++{
++ if (likely(net->mib.mptcp_statistics))
++ SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
++}
++
+ bool mptcp_mib_alloc(struct net *net);
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 9d4d5dbdbb53b..7765514451ded 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2877,6 +2877,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
+ release_sock(ssk);
+ }
+
++void mptcp_set_state(struct sock *sk, int state)
++{
++ int oldstate = sk->sk_state;
++
++ switch (state) {
++ case TCP_ESTABLISHED:
++ if (oldstate != TCP_ESTABLISHED)
++ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
++ break;
++
++ default:
++ if (oldstate == TCP_ESTABLISHED)
++ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
++ }
++
++ inet_sk_state_store(sk, state);
++}
++
+ static const unsigned char new_state[16] = {
+ /* current state: new state: action: */
+ [0 /* (Invalid) */] = TCP_CLOSE,
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index 094d3fd47a92f..01778ffa86be1 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -642,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
+ void mptcp_cancel_work(struct sock *sk);
+ void __mptcp_unaccepted_force_close(struct sock *sk);
+ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
++void mptcp_set_state(struct sock *sk, int state);
+
+ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port);
+--
+2.43.0
+
--- /dev/null
+From 9a619e8f8646e9b6c6061d04a4fa75017336a31c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 19:03:52 +0100
+Subject: mptcp: corner case locking for rx path fields initialization
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit e4a0fa47e816e186f6b4c0055d07eeec42d11871 ]
+
+Most MPTCP-level related fields are under the mptcp data lock
+protection, but are written one-off without such lock at MPC
+complete time, both for the client and the server
+
+Leverage the mptcp_propagate_state() infrastructure to move such
+initialization under the proper lock client-wise.
+
+The server side critical init steps are done by
+mptcp_subflow_fully_established(): ensure the caller properly held the
+relevant lock, and avoid acquiring the same lock in the nested scopes.
+
+There are no real potential races, as write access to such fields
+is implicitly serialized by the MPTCP state machine; the primary
+goal is consistency.
+
+Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/fastopen.c | 6 ++---
+ net/mptcp/options.c | 9 +++----
+ net/mptcp/protocol.c | 9 ++++---
+ net/mptcp/protocol.h | 9 +++----
+ net/mptcp/subflow.c | 56 +++++++++++++++++++++++++-------------------
+ 5 files changed, 50 insertions(+), 39 deletions(-)
+
+diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
+index 74698582a2859..ad28da655f8bc 100644
+--- a/net/mptcp/fastopen.c
++++ b/net/mptcp/fastopen.c
+@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
+ mptcp_data_unlock(sk);
+ }
+
+-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+- const struct mptcp_options_received *mp_opt)
++void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
++ const struct mptcp_options_received *mp_opt)
+ {
+ struct sock *sk = (struct sock *)msk;
+ struct sk_buff *skb;
+
+- mptcp_data_lock(sk);
+ skb = skb_peek_tail(&sk->sk_receive_queue);
+ if (skb) {
+ WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
+@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
+ }
+
+ pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
+- mptcp_data_unlock(sk);
+ }
+diff --git a/net/mptcp/options.c b/net/mptcp/options.c
+index d2527d189a799..e3e96a49f9229 100644
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
+ /* subflows are fully established as soon as we get any
+ * additional ack, including ADD_ADDR.
+ */
+- subflow->fully_established = 1;
+- WRITE_ONCE(msk->fully_established, true);
+- goto check_notify;
++ goto set_fully_established;
+ }
+
+ /* If the first established packet does not contain MP_CAPABLE + data
+@@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
+ set_fully_established:
+ if (unlikely(!READ_ONCE(msk->pm.server_side)))
+ pr_warn_once("bogus mpc option on established client sk");
+- mptcp_subflow_fully_established(subflow, mp_opt);
++
++ mptcp_data_lock((struct sock *)msk);
++ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
++ mptcp_data_unlock((struct sock *)msk);
+
+ check_notify:
+ /* if the subflow is not already linked into the conn_list, we can't
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 55a90a7b7b517..d369274113108 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -3195,6 +3195,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ {
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
++ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+
+ if (!nsk)
+@@ -3235,7 +3236,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+
+ /* The msk maintain a ref to each subflow in the connections list */
+ WRITE_ONCE(msk->first, ssk);
+- list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
++ subflow = mptcp_subflow_ctx(ssk);
++ list_add(&subflow->node, &msk->conn_list);
+ sock_hold(ssk);
+
+ /* new mpc subflow takes ownership of the newly
+@@ -3250,6 +3252,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ __mptcp_propagate_sndbuf(nsk, ssk);
+
+ mptcp_rcv_space_init(msk, ssk);
++
++ if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
++ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+ bh_unlock_sock(nsk);
+
+ /* note: the newly allocated socket refcount is 2 now */
+@@ -3525,8 +3530,6 @@ void mptcp_finish_connect(struct sock *ssk)
+ * accessing the field below
+ */
+ WRITE_ONCE(msk->local_key, subflow->local_key);
+- WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+- WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+
+ mptcp_pm_new_connection(msk, ssk, 0);
+ }
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index 01778ffa86be1..c9516882cdd4c 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -623,8 +623,9 @@ int mptcp_allow_join_id0(const struct net *net);
+ unsigned int mptcp_stale_loss_cnt(const struct net *net);
+ int mptcp_get_pm_type(const struct net *net);
+ const char *mptcp_get_scheduler(const struct net *net);
+-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+- const struct mptcp_options_received *mp_opt);
++void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
++ struct mptcp_subflow_context *subflow,
++ const struct mptcp_options_received *mp_opt);
+ bool __mptcp_retransmit_pending_data(struct sock *sk);
+ void mptcp_check_and_set_pending(struct sock *sk);
+ void __mptcp_push_pending(struct sock *sk, unsigned int flags);
+@@ -938,8 +939,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
+ enum mptcp_event_type event);
+ bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
+
+-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+- const struct mptcp_options_received *mp_opt);
++void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
++ const struct mptcp_options_received *mp_opt);
+ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req);
+
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index ba739e7009221..43d6ee4328141 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -441,20 +441,6 @@ void __mptcp_sync_state(struct sock *sk, int state)
+ }
+ }
+
+-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
+-{
+- struct mptcp_sock *msk = mptcp_sk(sk);
+-
+- mptcp_data_lock(sk);
+- if (!sock_owned_by_user(sk)) {
+- __mptcp_sync_state(sk, ssk->sk_state);
+- } else {
+- msk->pending_state = ssk->sk_state;
+- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+- }
+- mptcp_data_unlock(sk);
+-}
+-
+ static void subflow_set_remote_key(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+@@ -476,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
+ atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
+ }
+
++static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
++ struct mptcp_subflow_context *subflow,
++ const struct mptcp_options_received *mp_opt)
++{
++ struct mptcp_sock *msk = mptcp_sk(sk);
++
++ mptcp_data_lock(sk);
++ if (mp_opt) {
++ /* Options are available only in the non fallback cases
++ * avoid updating rx path fields otherwise
++ */
++ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
++ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
++ subflow_set_remote_key(msk, subflow, mp_opt);
++ }
++
++ if (!sock_owned_by_user(sk)) {
++ __mptcp_sync_state(sk, ssk->sk_state);
++ } else {
++ msk->pending_state = ssk->sk_state;
++ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
++ }
++ mptcp_data_unlock(sk);
++}
++
+ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+ {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+@@ -510,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+ subflow->mp_capable = 1;
+- subflow_set_remote_key(msk, subflow, &mp_opt);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
+ mptcp_finish_connect(sk);
+- mptcp_propagate_state(parent, sk);
++ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
+ } else if (subflow->request_join) {
+ u8 hmac[SHA256_DIGEST_SIZE];
+
+@@ -556,7 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+ }
+ } else if (mptcp_check_fallback(sk)) {
+ fallback:
+- mptcp_propagate_state(parent, sk);
++ mptcp_propagate_state(parent, sk, subflow, NULL);
+ }
+ return;
+
+@@ -741,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
+ kfree_rcu(ctx, rcu);
+ }
+
+-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+- const struct mptcp_options_received *mp_opt)
++void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
++ struct mptcp_subflow_context *subflow,
++ const struct mptcp_options_received *mp_opt)
+ {
+- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+-
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ subflow->fully_established = 1;
+ WRITE_ONCE(msk->fully_established, true);
+
+ if (subflow->is_mptfo)
+- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
++ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ }
+
+ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+@@ -844,7 +853,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+ * mpc option
+ */
+ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
+- mptcp_subflow_fully_established(ctx, &mp_opt);
+ mptcp_pm_fully_established(owner, child);
+ ctx->pm_notified = 1;
+ }
+@@ -1748,7 +1756,7 @@ static void subflow_state_change(struct sock *sk)
+ mptcp_do_fallback(sk);
+ pr_fallback(msk);
+ subflow->conn_finished = 1;
+- mptcp_propagate_state(parent, sk);
++ mptcp_propagate_state(parent, sk, subflow, NULL);
+ }
+
+ /* as recvmsg() does not acquire the subflow socket for ssk selection
+--
+2.43.0
+
--- /dev/null
+From c4e9f078d87de7990431384ed15d7a148fd2cd85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 19:03:51 +0100
+Subject: mptcp: fix more tx path fields initialization
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 3f83d8a77eeeb47011b990fd766a421ee64f1d73 ]
+
+The 'msk->write_seq' and 'msk->snd_nxt' are always updated under
+the msk socket lock, except at MPC handshake completiont time.
+
+Builds-up on the previous commit to move such init under the relevant
+lock.
+
+There are no known problems caused by the potential race, the
+primary goal is consistency.
+
+Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 6 ++----
+ net/mptcp/subflow.c | 13 +++++++++++--
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 2f794924ae5d4..55a90a7b7b517 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -3525,10 +3525,8 @@ void mptcp_finish_connect(struct sock *ssk)
+ * accessing the field below
+ */
+ WRITE_ONCE(msk->local_key, subflow->local_key);
+- WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+- WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+- WRITE_ONCE(msk->snd_una, msk->write_seq);
+- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
++ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
++ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+
+ mptcp_pm_new_connection(msk, ssk, 0);
+ }
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 15f456fb28977..ba739e7009221 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -421,12 +421,21 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
+
+ void __mptcp_sync_state(struct sock *sk, int state)
+ {
++ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
++ struct sock *ssk = msk->first;
+
+- __mptcp_propagate_sndbuf(sk, msk->first);
++ subflow = mptcp_subflow_ctx(ssk);
++ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+- mptcp_rcv_space_init(msk, msk->first);
++ mptcp_rcv_space_init(msk, ssk);
++
+ if (sk->sk_state == TCP_SYN_SENT) {
++ /* subflow->idsn is always available is TCP_SYN_SENT state,
++ * even for the FASTOPEN scenarios
++ */
++ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
++ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ mptcp_set_state(sk, state);
+ sk->sk_state_change(sk);
+ }
+--
+2.43.0
+
--- /dev/null
+From 8cf052a72d794c3afa841e0261d58273ee6a83d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Dec 2023 13:47:23 +0100
+Subject: mptcp: use mptcp_set_state
+
+From: Geliang Tang <geliang.tang@linux.dev>
+
+[ Upstream commit c693a8516429908da3ea111b0caa3c042ab1e6e9 ]
+
+This patch replaces all the 'inet_sk_state_store()' calls under net/mptcp
+with the new helper mptcp_set_state().
+
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/460
+Signed-off-by: Geliang Tang <geliang.tang@linux.dev>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 5 +++++
+ net/mptcp/protocol.c | 38 +++++++++++++++++++-------------------
+ net/mptcp/subflow.c | 2 +-
+ 3 files changed, 25 insertions(+), 20 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 3011bc378462b..44c0e96210a46 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
+ if (err)
+ return err;
+
++ /* We don't use mptcp_set_state() here because it needs to be called
++ * under the msk socket lock. For the moment, that will not bring
++ * anything more than only calling inet_sk_state_store(), because the
++ * old status is known (TCP_CLOSE).
++ */
+ inet_sk_state_store(newsk, TCP_LISTEN);
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 7765514451ded..2f794924ae5d4 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -445,11 +445,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
+
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+- inet_sk_state_store(sk, TCP_FIN_WAIT2);
++ mptcp_set_state(sk, TCP_FIN_WAIT2);
+ break;
+ case TCP_CLOSING:
+ case TCP_LAST_ACK:
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ break;
+ }
+
+@@ -610,13 +610,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
+
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
+- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
++ mptcp_set_state(sk, TCP_CLOSE_WAIT);
+ break;
+ case TCP_FIN_WAIT1:
+- inet_sk_state_store(sk, TCP_CLOSING);
++ mptcp_set_state(sk, TCP_CLOSING);
+ break;
+ case TCP_FIN_WAIT2:
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ break;
+ default:
+ /* Other states not expected */
+@@ -791,7 +791,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+- inet_sk_state_store(sk, ssk_state);
++ mptcp_set_state(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+@@ -2470,7 +2470,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ inet_sk_state_load(msk->first) == TCP_CLOSE) {
+ if (sk->sk_state != TCP_ESTABLISHED ||
+ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ mptcp_close_wake_up(sk);
+ } else {
+ mptcp_start_tout_timer(sk);
+@@ -2565,7 +2565,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
+ }
+
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
+ smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
+@@ -2700,7 +2700,7 @@ static void mptcp_do_fastclose(struct sock *sk)
+ struct mptcp_subflow_context *subflow, *tmp;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ mptcp_for_each_subflow_safe(msk, subflow, tmp)
+ __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
+ subflow, MPTCP_CF_FASTCLOSE);
+@@ -2917,7 +2917,7 @@ static int mptcp_close_state(struct sock *sk)
+ int next = (int)new_state[sk->sk_state];
+ int ns = next & TCP_STATE_MASK;
+
+- inet_sk_state_store(sk, ns);
++ mptcp_set_state(sk, ns);
+
+ return next & TCP_ACTION_FIN;
+ }
+@@ -3035,7 +3035,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
+ mptcp_check_listen_stop(sk);
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ goto cleanup;
+ }
+
+@@ -3078,7 +3078,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+
+ sock_hold(sk);
+ pr_debug("msk=%p state=%d", sk, sk->sk_state);
+@@ -3144,7 +3144,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
+ return -EBUSY;
+
+ mptcp_check_listen_stop(sk);
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+
+ mptcp_stop_rtx_timer(sk);
+ mptcp_stop_tout_timer(sk);
+@@ -3231,7 +3231,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ /* this can't race with mptcp_close(), as the msk is
+ * not yet exposted to user-space
+ */
+- inet_sk_state_store(nsk, TCP_ESTABLISHED);
++ mptcp_set_state(nsk, TCP_ESTABLISHED);
+
+ /* The msk maintain a ref to each subflow in the connections list */
+ WRITE_ONCE(msk->first, ssk);
+@@ -3686,7 +3686,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
+
+- inet_sk_state_store(sk, TCP_SYN_SENT);
++ mptcp_set_state(sk, TCP_SYN_SENT);
+ subflow = mptcp_subflow_ctx(ssk);
+ #ifdef CONFIG_TCP_MD5SIG
+ /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+@@ -3736,7 +3736,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ if (unlikely(err)) {
+ /* avoid leaving a dangling token in an unconnected socket */
+ mptcp_token_destroy(msk);
+- inet_sk_state_store(sk, TCP_CLOSE);
++ mptcp_set_state(sk, TCP_CLOSE);
+ return err;
+ }
+
+@@ -3826,13 +3826,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
+ goto unlock;
+ }
+
+- inet_sk_state_store(sk, TCP_LISTEN);
++ mptcp_set_state(sk, TCP_LISTEN);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ release_sock(ssk);
+- inet_sk_state_store(sk, inet_sk_state_load(ssk));
++ mptcp_set_state(sk, inet_sk_state_load(ssk));
+
+ if (!err) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+@@ -3892,7 +3892,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
+ __mptcp_close_ssk(newsk, msk->first,
+ mptcp_subflow_ctx(msk->first), 0);
+ if (unlikely(list_is_singular(&msk->conn_list)))
+- inet_sk_state_store(newsk, TCP_CLOSE);
++ mptcp_set_state(newsk, TCP_CLOSE);
+ }
+ }
+ release_sock(newsk);
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 8c7e22a9a37bd..15f456fb28977 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -427,7 +427,7 @@ void __mptcp_sync_state(struct sock *sk, int state)
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+ if (sk->sk_state == TCP_SYN_SENT) {
+- inet_sk_state_store(sk, state);
++ mptcp_set_state(sk, state);
+ sk->sk_state_change(sk);
+ }
+ }
+--
+2.43.0
+
cifs-change-tcon-status-when-need_reconnect-is-set-o.patch
cifs-handle-cases-where-multiple-sessions-share-conn.patch
smb3-clarify-mount-warning.patch
+mptcp-add-currestab-mib-counter-support.patch
+mptcp-use-mptcp_set_state.patch
+mptcp-fix-more-tx-path-fields-initialization.patch
+mptcp-corner-case-locking-for-rx-path-fields-initial.patch
+drm-amd-display-add-dpia-display-mode-validation-log.patch
+drm-amd-display-request-usb4-bw-for-mst-streams.patch
+drm-amd-display-fixed-integer-types-and-null-check-l.patch
+xen-evtchn-allow-shared-registration-of-irq-handers.patch
+xen-events-reduce-externally-visible-helper-function.patch
+xen-events-remove-some-simple-helpers-from-events_ba.patch
+xen-events-drop-xen_allocate_irqs_dynamic.patch
+xen-events-modify-internal-un-bind-interfaces.patch
+xen-events-close-evtchn-after-mapping-cleanup.patch
--- /dev/null
+From fe5396afd11d8a5bab88de130fd203974520e953 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jan 2024 16:31:28 +0000
+Subject: xen/events: close evtchn after mapping cleanup
+
+From: Maximilian Heyne <mheyne@amazon.de>
+
+[ Upstream commit fa765c4b4aed2d64266b694520ecb025c862c5a9 ]
+
+shutdown_pirq and startup_pirq are not taking the
+irq_mapping_update_lock because they can't due to lock inversion. Both
+are called with the irq_desc->lock being taking. The lock order,
+however, is first irq_mapping_update_lock and then irq_desc->lock.
+
+This opens multiple races:
+- shutdown_pirq can be interrupted by a function that allocates an event
+ channel:
+
+ CPU0 CPU1
+ shutdown_pirq {
+ xen_evtchn_close(e)
+ __startup_pirq {
+ EVTCHNOP_bind_pirq
+ -> returns just freed evtchn e
+ set_evtchn_to_irq(e, irq)
+ }
+ xen_irq_info_cleanup() {
+ set_evtchn_to_irq(e, -1)
+ }
+ }
+
+ Assume here event channel e refers here to the same event channel
+ number.
+ After this race the evtchn_to_irq mapping for e is invalid (-1).
+
+- __startup_pirq races with __unbind_from_irq in a similar way. Because
+ __startup_pirq doesn't take irq_mapping_update_lock it can grab the
+ evtchn that __unbind_from_irq is currently freeing and cleaning up. In
+ this case even though the event channel is allocated, its mapping can
+ be unset in evtchn_to_irq.
+
+The fix is to first cleanup the mappings and then close the event
+channel. In this way, when an event channel gets allocated it's
+potential previous evtchn_to_irq mappings are guaranteed to be unset already.
+This is also the reverse order of the allocation where first the event
+channel is allocated and then the mappings are setup.
+
+On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
+[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
+devices. The issue is that during nvme_setup_io_queues, pci_free_irq
+is called for every device which results in a call to shutdown_pirq.
+With many nvme devices it's therefore likely to hit this race during
+boot because there will be multiple calls to shutdown_pirq and
+startup_pirq are running potentially in parallel.
+
+ ------------[ cut here ]------------
+ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
+ kernel BUG at drivers/xen/events/events_base.c:499!
+ invalid opcode: 0000 [#1] SMP PTI
+ CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
+ Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
+ Workqueue: nvme-reset-wq nvme_reset_work
+ RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
+ Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
+ RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
+ RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
+ RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
+ RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
+ R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
+ R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
+ FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ ? show_trace_log_lvl+0x1c1/0x2d9
+ ? show_trace_log_lvl+0x1c1/0x2d9
+ ? set_affinity_irq+0xdc/0x1c0
+ ? __die_body.cold+0x8/0xd
+ ? die+0x2b/0x50
+ ? do_trap+0x90/0x110
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? do_error_trap+0x65/0x80
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? exc_invalid_op+0x4e/0x70
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? asm_exc_invalid_op+0x12/0x20
+ ? bind_evtchn_to_cpu+0xdf/0xf0
+ ? bind_evtchn_to_cpu+0xc5/0xf0
+ set_affinity_irq+0xdc/0x1c0
+ irq_do_set_affinity+0x1d7/0x1f0
+ irq_setup_affinity+0xd6/0x1a0
+ irq_startup+0x8a/0xf0
+ __setup_irq+0x639/0x6d0
+ ? nvme_suspend+0x150/0x150
+ request_threaded_irq+0x10c/0x180
+ ? nvme_suspend+0x150/0x150
+ pci_request_irq+0xa8/0xf0
+ ? __blk_mq_free_request+0x74/0xa0
+ queue_request_irq+0x6f/0x80
+ nvme_create_queue+0x1af/0x200
+ nvme_create_io_queues+0xbd/0xf0
+ nvme_setup_io_queues+0x246/0x320
+ ? nvme_irq_check+0x30/0x30
+ nvme_reset_work+0x1c8/0x400
+ process_one_work+0x1b0/0x350
+ worker_thread+0x49/0x310
+ ? process_one_work+0x350/0x350
+ kthread+0x11b/0x140
+ ? __kthread_bind_mask+0x60/0x60
+ ret_from_fork+0x22/0x30
+ Modules linked in:
+ ---[ end trace a11715de1eee1873 ]---
+
+Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
+Cc: stable@vger.kernel.org
+Co-debugged-by: Andrew Panyakin <apanyaki@amazon.com>
+Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 6f57ef78f5507..36ba3ef6ef01e 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -908,8 +908,8 @@ static void shutdown_pirq(struct irq_data *data)
+ return;
+
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
+- xen_evtchn_close(evtchn);
+ xen_irq_info_cleanup(info);
++ xen_evtchn_close(evtchn);
+ }
+
+ static void enable_pirq(struct irq_data *data)
+@@ -941,6 +941,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+ {
+ evtchn_port_t evtchn;
++ bool close_evtchn = false;
+
+ if (!info) {
+ xen_irq_free_desc(irq);
+@@ -960,7 +961,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+ struct xenbus_device *dev;
+
+ if (!info->is_static)
+- xen_evtchn_close(evtchn);
++ close_evtchn = true;
+
+ switch (info->type) {
+ case IRQT_VIRQ:
+@@ -980,6 +981,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+ }
+
+ xen_irq_info_cleanup(info);
++
++ if (close_evtchn)
++ xen_evtchn_close(evtchn);
+ }
+
+ xen_free_irq(info);
+--
+2.43.0
+
--- /dev/null
+From 1c37d9d8b5ba3045652ec98bccca52987c81bf5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 10:29:02 +0200
+Subject: xen/events: drop xen_allocate_irqs_dynamic()
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 5dd9ad32d7758b1a76742f394acf0eb3ac8a636a ]
+
+Instead of having a common function for allocating a single IRQ or a
+consecutive number of IRQs, split up the functionality into the callers
+of xen_allocate_irqs_dynamic().
+
+This allows to handle any allocation error in xen_irq_init() gracefully
+instead of panicing the system. Let xen_irq_init() return the irq_info
+pointer or NULL in case of an allocation error.
+
+Additionally set the IRQ into irq_info already at allocation time, as
+otherwise the IRQ would be '0' (which is a valid IRQ number) until
+being set.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 74 +++++++++++++++++++-------------
+ 1 file changed, 44 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index d3d7501628381..4dfd68382465b 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -304,6 +304,13 @@ static void channels_on_cpu_inc(struct irq_info *info)
+ info->is_accounted = 1;
+ }
+
++static void xen_irq_free_desc(unsigned int irq)
++{
++ /* Legacy IRQ descriptors are managed by the arch. */
++ if (irq >= nr_legacy_irqs())
++ irq_free_desc(irq);
++}
++
+ static void delayed_free_irq(struct work_struct *work)
+ {
+ struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
+@@ -315,9 +322,7 @@ static void delayed_free_irq(struct work_struct *work)
+
+ kfree(info);
+
+- /* Legacy IRQ descriptors are managed by the arch. */
+- if (irq >= nr_legacy_irqs())
+- irq_free_desc(irq);
++ xen_irq_free_desc(irq);
+ }
+
+ /* Constructors for packed IRQ information. */
+@@ -332,7 +337,6 @@ static int xen_irq_info_common_setup(struct irq_info *info,
+ BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
+
+ info->type = type;
+- info->irq = irq;
+ info->evtchn = evtchn;
+ info->cpu = cpu;
+ info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+@@ -733,47 +737,45 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+ }
+ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+
+-static void xen_irq_init(unsigned irq)
++static struct irq_info *xen_irq_init(unsigned int irq)
+ {
+ struct irq_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+- if (info == NULL)
+- panic("Unable to allocate metadata for IRQ%d\n", irq);
++ if (info) {
++ info->irq = irq;
++ info->type = IRQT_UNBOUND;
++ info->refcnt = -1;
++ INIT_RCU_WORK(&info->rwork, delayed_free_irq);
+
+- info->type = IRQT_UNBOUND;
+- info->refcnt = -1;
+- INIT_RCU_WORK(&info->rwork, delayed_free_irq);
++ set_info_for_irq(irq, info);
++ /*
++ * Interrupt affinity setting can be immediate. No point
++ * in delaying it until an interrupt is handled.
++ */
++ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+
+- set_info_for_irq(irq, info);
+- /*
+- * Interrupt affinity setting can be immediate. No point
+- * in delaying it until an interrupt is handled.
+- */
+- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
++ INIT_LIST_HEAD(&info->eoi_list);
++ list_add_tail(&info->list, &xen_irq_list_head);
++ }
+
+- INIT_LIST_HEAD(&info->eoi_list);
+- list_add_tail(&info->list, &xen_irq_list_head);
++ return info;
+ }
+
+-static int __must_check xen_allocate_irqs_dynamic(int nvec)
++static inline int __must_check xen_allocate_irq_dynamic(void)
+ {
+- int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
++ int irq = irq_alloc_desc_from(0, -1);
+
+ if (irq >= 0) {
+- for (i = 0; i < nvec; i++)
+- xen_irq_init(irq + i);
++ if (!xen_irq_init(irq)) {
++ xen_irq_free_desc(irq);
++ irq = -1;
++ }
+ }
+
+ return irq;
+ }
+
+-static inline int __must_check xen_allocate_irq_dynamic(void)
+-{
+-
+- return xen_allocate_irqs_dynamic(1);
+-}
+-
+ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+ {
+ int irq;
+@@ -793,7 +795,10 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+ else
+ irq = irq_alloc_desc_at(gsi, -1);
+
+- xen_irq_init(irq);
++ if (!xen_irq_init(irq)) {
++ xen_irq_free_desc(irq);
++ irq = -1;
++ }
+
+ return irq;
+ }
+@@ -963,6 +968,11 @@ static void __unbind_from_irq(unsigned int irq)
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = info_for_irq(irq);
+
++ if (!info) {
++ xen_irq_free_desc(irq);
++ return;
++ }
++
+ if (info->refcnt > 0) {
+ info->refcnt--;
+ if (info->refcnt != 0)
+@@ -1101,11 +1111,14 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+
+ mutex_lock(&irq_mapping_update_lock);
+
+- irq = xen_allocate_irqs_dynamic(nvec);
++ irq = irq_alloc_descs(-1, 0, nvec, -1);
+ if (irq < 0)
+ goto out;
+
+ for (i = 0; i < nvec; i++) {
++ if (!xen_irq_init(irq + i))
++ goto error_irq;
++
+ irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
+
+ ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+@@ -1753,6 +1766,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
+ so there should be a proper type */
+ BUG_ON(info->type == IRQT_UNBOUND);
+
++ info->irq = irq;
+ (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
+
+ mutex_unlock(&irq_mapping_update_lock);
+--
+2.43.0
+
--- /dev/null
+From c14a0c2a671788360660b60d5becf2ca1ac54c9a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Sep 2023 09:09:52 +0200
+Subject: xen/events: modify internal [un]bind interfaces
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 3fcdaf3d7634338c3f5cbfa7451eb0b6b0024844 ]
+
+Modify the internal bind- and unbind-interfaces to take a struct
+irq_info parameter. When allocating a new IRQ pass the pointer from
+the allocating function further up.
+
+This will reduce the number of info_for_irq() calls and make the code
+more efficient.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 259 +++++++++++++++----------------
+ 1 file changed, 124 insertions(+), 135 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 4dfd68382465b..6f57ef78f5507 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -327,7 +327,6 @@ static void delayed_free_irq(struct work_struct *work)
+
+ /* Constructors for packed IRQ information. */
+ static int xen_irq_info_common_setup(struct irq_info *info,
+- unsigned irq,
+ enum xen_irq_type type,
+ evtchn_port_t evtchn,
+ unsigned short cpu)
+@@ -342,23 +341,22 @@ static int xen_irq_info_common_setup(struct irq_info *info,
+ info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+ raw_spin_lock_init(&info->lock);
+
+- ret = set_evtchn_to_irq(evtchn, irq);
++ ret = set_evtchn_to_irq(evtchn, info->irq);
+ if (ret < 0)
+ return ret;
+
+- irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
++ irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN);
+
+ return xen_evtchn_port_setup(evtchn);
+ }
+
+-static int xen_irq_info_evtchn_setup(unsigned irq,
++static int xen_irq_info_evtchn_setup(struct irq_info *info,
+ evtchn_port_t evtchn,
+ struct xenbus_device *dev)
+ {
+- struct irq_info *info = info_for_irq(irq);
+ int ret;
+
+- ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
++ ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0);
+ info->u.interdomain = dev;
+ if (dev)
+ atomic_inc(&dev->event_channels);
+@@ -366,50 +364,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
+ return ret;
+ }
+
+-static int xen_irq_info_ipi_setup(unsigned cpu,
+- unsigned irq,
+- evtchn_port_t evtchn,
+- enum ipi_vector ipi)
++static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu,
++ evtchn_port_t evtchn, enum ipi_vector ipi)
+ {
+- struct irq_info *info = info_for_irq(irq);
+-
+ info->u.ipi = ipi;
+
+- per_cpu(ipi_to_irq, cpu)[ipi] = irq;
++ per_cpu(ipi_to_irq, cpu)[ipi] = info->irq;
+ per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
+
+- return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
++ return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0);
+ }
+
+-static int xen_irq_info_virq_setup(unsigned cpu,
+- unsigned irq,
+- evtchn_port_t evtchn,
+- unsigned virq)
++static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu,
++ evtchn_port_t evtchn, unsigned int virq)
+ {
+- struct irq_info *info = info_for_irq(irq);
+-
+ info->u.virq = virq;
+
+- per_cpu(virq_to_irq, cpu)[virq] = irq;
++ per_cpu(virq_to_irq, cpu)[virq] = info->irq;
+
+- return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
++ return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0);
+ }
+
+-static int xen_irq_info_pirq_setup(unsigned irq,
+- evtchn_port_t evtchn,
+- unsigned pirq,
+- unsigned gsi,
+- uint16_t domid,
+- unsigned char flags)
++static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn,
++ unsigned int pirq, unsigned int gsi,
++ uint16_t domid, unsigned char flags)
+ {
+- struct irq_info *info = info_for_irq(irq);
+-
+ info->u.pirq.pirq = pirq;
+ info->u.pirq.gsi = gsi;
+ info->u.pirq.domid = domid;
+ info->u.pirq.flags = flags;
+
+- return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
++ return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0);
+ }
+
+ static void xen_irq_info_cleanup(struct irq_info *info)
+@@ -453,20 +438,16 @@ int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+ return irq;
+ }
+
+-static enum ipi_vector ipi_from_irq(unsigned irq)
++static enum ipi_vector ipi_from_irq(struct irq_info *info)
+ {
+- struct irq_info *info = info_for_irq(irq);
+-
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_IPI);
+
+ return info->u.ipi;
+ }
+
+-static unsigned virq_from_irq(unsigned irq)
++static unsigned int virq_from_irq(struct irq_info *info)
+ {
+- struct irq_info *info = info_for_irq(irq);
+-
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_VIRQ);
+
+@@ -533,13 +514,9 @@ static bool pirq_needs_eoi_flag(unsigned irq)
+ return info->u.pirq.flags & PIRQ_NEEDS_EOI;
+ }
+
+-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
++static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu,
+ bool force_affinity)
+ {
+- struct irq_info *info = evtchn_to_info(evtchn);
+-
+- BUG_ON(info == NULL);
+-
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ struct irq_data *data = irq_get_irq_data(info->irq);
+
+@@ -547,7 +524,7 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ irq_data_update_effective_affinity(data, cpumask_of(cpu));
+ }
+
+- xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
++ xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu);
+
+ channels_on_cpu_dec(info);
+ info->cpu = cpu;
+@@ -762,23 +739,24 @@ static struct irq_info *xen_irq_init(unsigned int irq)
+ return info;
+ }
+
+-static inline int __must_check xen_allocate_irq_dynamic(void)
++static struct irq_info *xen_allocate_irq_dynamic(void)
+ {
+ int irq = irq_alloc_desc_from(0, -1);
++ struct irq_info *info = NULL;
+
+ if (irq >= 0) {
+- if (!xen_irq_init(irq)) {
++ info = xen_irq_init(irq);
++ if (!info)
+ xen_irq_free_desc(irq);
+- irq = -1;
+- }
+ }
+
+- return irq;
++ return info;
+ }
+
+-static int __must_check xen_allocate_irq_gsi(unsigned gsi)
++static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi)
+ {
+ int irq;
++ struct irq_info *info;
+
+ /*
+ * A PV guest has no concept of a GSI (since it has no ACPI
+@@ -795,18 +773,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+ else
+ irq = irq_alloc_desc_at(gsi, -1);
+
+- if (!xen_irq_init(irq)) {
++ info = xen_irq_init(irq);
++ if (!info)
+ xen_irq_free_desc(irq);
+- irq = -1;
+- }
+
+- return irq;
++ return info;
+ }
+
+-static void xen_free_irq(unsigned irq)
++static void xen_free_irq(struct irq_info *info)
+ {
+- struct irq_info *info = info_for_irq(irq);
+-
+ if (WARN_ON(!info))
+ return;
+
+@@ -897,7 +872,7 @@ static unsigned int __startup_pirq(unsigned int irq)
+ goto err;
+
+ info->evtchn = evtchn;
+- bind_evtchn_to_cpu(evtchn, 0, false);
++ bind_evtchn_to_cpu(info, 0, false);
+
+ rc = xen_evtchn_port_setup(evtchn);
+ if (rc)
+@@ -963,10 +938,9 @@ int xen_irq_from_gsi(unsigned gsi)
+ }
+ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+
+-static void __unbind_from_irq(unsigned int irq)
++static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+ {
+- evtchn_port_t evtchn = evtchn_from_irq(irq);
+- struct irq_info *info = info_for_irq(irq);
++ evtchn_port_t evtchn;
+
+ if (!info) {
+ xen_irq_free_desc(irq);
+@@ -979,6 +953,8 @@ static void __unbind_from_irq(unsigned int irq)
+ return;
+ }
+
++ evtchn = info->evtchn;
++
+ if (VALID_EVTCHN(evtchn)) {
+ unsigned int cpu = info->cpu;
+ struct xenbus_device *dev;
+@@ -988,11 +964,11 @@ static void __unbind_from_irq(unsigned int irq)
+
+ switch (info->type) {
+ case IRQT_VIRQ:
+- per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
++ per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1;
+ break;
+ case IRQT_IPI:
+- per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+- per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(irq)] = 0;
++ per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
++ per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0;
+ break;
+ case IRQT_EVTCHN:
+ dev = info->u.interdomain;
+@@ -1006,7 +982,7 @@ static void __unbind_from_irq(unsigned int irq)
+ xen_irq_info_cleanup(info);
+ }
+
+- xen_free_irq(irq);
++ xen_free_irq(info);
+ }
+
+ /*
+@@ -1022,24 +998,24 @@ static void __unbind_from_irq(unsigned int irq)
+ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
+ unsigned pirq, int shareable, char *name)
+ {
+- int irq;
++ struct irq_info *info;
+ struct physdev_irq irq_op;
+ int ret;
+
+ mutex_lock(&irq_mapping_update_lock);
+
+- irq = xen_irq_from_gsi(gsi);
+- if (irq != -1) {
++ ret = xen_irq_from_gsi(gsi);
++ if (ret != -1) {
+ pr_info("%s: returning irq %d for gsi %u\n",
+- __func__, irq, gsi);
++ __func__, ret, gsi);
+ goto out;
+ }
+
+- irq = xen_allocate_irq_gsi(gsi);
+- if (irq < 0)
++ info = xen_allocate_irq_gsi(gsi);
++ if (!info)
+ goto out;
+
+- irq_op.irq = irq;
++ irq_op.irq = info->irq;
+ irq_op.vector = 0;
+
+ /* Only the privileged domain can do this. For non-priv, the pcifront
+@@ -1047,20 +1023,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
+ * this in the priv domain. */
+ if (xen_initial_domain() &&
+ HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+- xen_free_irq(irq);
+- irq = -ENOSPC;
++ xen_free_irq(info);
++ ret = -ENOSPC;
+ goto out;
+ }
+
+- ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
++ ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF,
+ shareable ? PIRQ_SHAREABLE : 0);
+ if (ret < 0) {
+- __unbind_from_irq(irq);
+- irq = ret;
++ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
+
+- pirq_query_unmask(irq);
++ pirq_query_unmask(info->irq);
+ /* We try to use the handler with the appropriate semantic for the
+ * type of interrupt: if the interrupt is an edge triggered
+ * interrupt we use handle_edge_irq.
+@@ -1077,16 +1052,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
+ * is the right choice either way.
+ */
+ if (shareable)
+- irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
++ irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
+ handle_fasteoi_irq, name);
+ else
+- irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
++ irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
+ handle_edge_irq, name);
+
++ ret = info->irq;
++
+ out:
+ mutex_unlock(&irq_mapping_update_lock);
+
+- return irq;
++ return ret;
+ }
+
+ #ifdef CONFIG_PCI_MSI
+@@ -1108,6 +1085,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ int pirq, int nvec, const char *name, domid_t domid)
+ {
+ int i, irq, ret;
++ struct irq_info *info;
+
+ mutex_lock(&irq_mapping_update_lock);
+
+@@ -1116,12 +1094,13 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ goto out;
+
+ for (i = 0; i < nvec; i++) {
+- if (!xen_irq_init(irq + i))
++ info = xen_irq_init(irq + i);
++ if (!info)
+ goto error_irq;
+
+ irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
+
+- ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
++ ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
+ i == 0 ? 0 : PIRQ_MSI_GROUP);
+ if (ret < 0)
+ goto error_irq;
+@@ -1133,9 +1112,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ out:
+ mutex_unlock(&irq_mapping_update_lock);
+ return irq;
++
+ error_irq:
+- while (nvec--)
+- __unbind_from_irq(irq + nvec);
++ while (nvec--) {
++ info = info_for_irq(irq + nvec);
++ __unbind_from_irq(info, irq + nvec);
++ }
+ mutex_unlock(&irq_mapping_update_lock);
+ return ret;
+ }
+@@ -1171,7 +1153,7 @@ int xen_destroy_irq(int irq)
+ }
+ }
+
+- xen_free_irq(irq);
++ xen_free_irq(info);
+
+ out:
+ mutex_unlock(&irq_mapping_update_lock);
+@@ -1210,8 +1192,7 @@ EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ struct xenbus_device *dev)
+ {
+- int irq;
+- int ret;
++ int ret = -ENOMEM;
+ struct irq_info *info;
+
+ if (evtchn >= xen_evtchn_max_channels())
+@@ -1222,17 +1203,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ info = evtchn_to_info(evtchn);
+
+ if (!info) {
+- irq = xen_allocate_irq_dynamic();
+- if (irq < 0)
++ info = xen_allocate_irq_dynamic();
++ if (!info)
+ goto out;
+
+- irq_set_chip_and_handler_name(irq, chip,
++ irq_set_chip_and_handler_name(info->irq, chip,
+ handle_edge_irq, "event");
+
+- ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
++ ret = xen_irq_info_evtchn_setup(info, evtchn, dev);
+ if (ret < 0) {
+- __unbind_from_irq(irq);
+- irq = ret;
++ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
+ /*
+@@ -1242,17 +1222,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ * affinity setting is not invoked on them so nothing would
+ * bind the channel.
+ */
+- bind_evtchn_to_cpu(evtchn, 0, false);
+- } else {
+- if (!WARN_ON(info->type != IRQT_EVTCHN))
+- info->refcnt++;
+- irq = info->irq;
++ bind_evtchn_to_cpu(info, 0, false);
++ } else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
++ info->refcnt++;
+ }
+
++ ret = info->irq;
++
+ out:
+ mutex_unlock(&irq_mapping_update_lock);
+
+- return irq;
++ return ret;
+ }
+
+ int bind_evtchn_to_irq(evtchn_port_t evtchn)
+@@ -1271,18 +1251,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ {
+ struct evtchn_bind_ipi bind_ipi;
+ evtchn_port_t evtchn;
+- int ret, irq;
++ struct irq_info *info;
++ int ret;
+
+ mutex_lock(&irq_mapping_update_lock);
+
+- irq = per_cpu(ipi_to_irq, cpu)[ipi];
++ ret = per_cpu(ipi_to_irq, cpu)[ipi];
+
+- if (irq == -1) {
+- irq = xen_allocate_irq_dynamic();
+- if (irq < 0)
++ if (ret == -1) {
++ info = xen_allocate_irq_dynamic();
++ if (!info)
+ goto out;
+
+- irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
++ irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
+ handle_percpu_irq, "ipi");
+
+ bind_ipi.vcpu = xen_vcpu_nr(cpu);
+@@ -1291,25 +1272,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ BUG();
+ evtchn = bind_ipi.port;
+
+- ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
++ ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
+ if (ret < 0) {
+- __unbind_from_irq(irq);
+- irq = ret;
++ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
+ /*
+ * Force the affinity mask to the target CPU so proc shows
+ * the correct target.
+ */
+- bind_evtchn_to_cpu(evtchn, cpu, true);
++ bind_evtchn_to_cpu(info, cpu, true);
++ ret = info->irq;
+ } else {
+- struct irq_info *info = info_for_irq(irq);
++ info = info_for_irq(ret);
+ WARN_ON(info == NULL || info->type != IRQT_IPI);
+ }
+
+ out:
+ mutex_unlock(&irq_mapping_update_lock);
+- return irq;
++ return ret;
+ }
+
+ static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
+@@ -1377,22 +1358,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
+ {
+ struct evtchn_bind_virq bind_virq;
+ evtchn_port_t evtchn = 0;
+- int irq, ret;
++ struct irq_info *info;
++ int ret;
+
+ mutex_lock(&irq_mapping_update_lock);
+
+- irq = per_cpu(virq_to_irq, cpu)[virq];
++ ret = per_cpu(virq_to_irq, cpu)[virq];
+
+- if (irq == -1) {
+- irq = xen_allocate_irq_dynamic();
+- if (irq < 0)
++ if (ret == -1) {
++ info = xen_allocate_irq_dynamic();
++ if (!info)
+ goto out;
+
+ if (percpu)
+- irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
++ irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
+ handle_percpu_irq, "virq");
+ else
+- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
++ irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip,
+ handle_edge_irq, "virq");
+
+ bind_virq.virq = virq;
+@@ -1407,10 +1389,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
+ BUG_ON(ret < 0);
+ }
+
+- ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
++ ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
+ if (ret < 0) {
+- __unbind_from_irq(irq);
+- irq = ret;
++ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
+
+@@ -1418,22 +1399,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
+ * Force the affinity mask for percpu interrupts so proc
+ * shows the correct target.
+ */
+- bind_evtchn_to_cpu(evtchn, cpu, percpu);
++ bind_evtchn_to_cpu(info, cpu, percpu);
++ ret = info->irq;
+ } else {
+- struct irq_info *info = info_for_irq(irq);
++ info = info_for_irq(ret);
+ WARN_ON(info == NULL || info->type != IRQT_VIRQ);
+ }
+
+ out:
+ mutex_unlock(&irq_mapping_update_lock);
+
+- return irq;
++ return ret;
+ }
+
+ static void unbind_from_irq(unsigned int irq)
+ {
++ struct irq_info *info;
++
+ mutex_lock(&irq_mapping_update_lock);
+- __unbind_from_irq(irq);
++ info = info_for_irq(irq);
++ __unbind_from_irq(info, irq);
+ mutex_unlock(&irq_mapping_update_lock);
+ }
+
+@@ -1767,11 +1752,11 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
+ BUG_ON(info->type == IRQT_UNBOUND);
+
+ info->irq = irq;
+- (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
++ (void)xen_irq_info_evtchn_setup(info, evtchn, NULL);
+
+ mutex_unlock(&irq_mapping_update_lock);
+
+- bind_evtchn_to_cpu(evtchn, info->cpu, false);
++ bind_evtchn_to_cpu(info, info->cpu, false);
+
+ /* Unmask the event channel. */
+ enable_irq(irq);
+@@ -1805,7 +1790,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
+ * it, but don't do the xenlinux-level rebind in that case.
+ */
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
+- bind_evtchn_to_cpu(evtchn, tcpu, false);
++ bind_evtchn_to_cpu(info, tcpu, false);
+
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
+
+@@ -1956,7 +1941,7 @@ static void restore_pirqs(void)
+ if (rc) {
+ pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
+ gsi, irq, pirq, rc);
+- xen_free_irq(irq);
++ xen_free_irq(info);
+ continue;
+ }
+
+@@ -1970,13 +1955,15 @@ static void restore_cpu_virqs(unsigned int cpu)
+ {
+ struct evtchn_bind_virq bind_virq;
+ evtchn_port_t evtchn;
++ struct irq_info *info;
+ int virq, irq;
+
+ for (virq = 0; virq < NR_VIRQS; virq++) {
+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+ continue;
++ info = info_for_irq(irq);
+
+- BUG_ON(virq_from_irq(irq) != virq);
++ BUG_ON(virq_from_irq(info) != virq);
+
+ /* Get a new binding from Xen. */
+ bind_virq.virq = virq;
+@@ -1987,9 +1974,9 @@ static void restore_cpu_virqs(unsigned int cpu)
+ evtchn = bind_virq.port;
+
+ /* Record the new mapping. */
+- (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
++ xen_irq_info_virq_setup(info, cpu, evtchn, virq);
+ /* The affinity mask is still valid */
+- bind_evtchn_to_cpu(evtchn, cpu, false);
++ bind_evtchn_to_cpu(info, cpu, false);
+ }
+ }
+
+@@ -1997,13 +1984,15 @@ static void restore_cpu_ipis(unsigned int cpu)
+ {
+ struct evtchn_bind_ipi bind_ipi;
+ evtchn_port_t evtchn;
++ struct irq_info *info;
+ int ipi, irq;
+
+ for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+ continue;
++ info = info_for_irq(irq);
+
+- BUG_ON(ipi_from_irq(irq) != ipi);
++ BUG_ON(ipi_from_irq(info) != ipi);
+
+ /* Get a new binding from Xen. */
+ bind_ipi.vcpu = xen_vcpu_nr(cpu);
+@@ -2013,9 +2002,9 @@ static void restore_cpu_ipis(unsigned int cpu)
+ evtchn = bind_ipi.port;
+
+ /* Record the new mapping. */
+- (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
++ xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
+ /* The affinity mask is still valid */
+- bind_evtchn_to_cpu(evtchn, cpu, false);
++ bind_evtchn_to_cpu(info, cpu, false);
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 66c0b706368143fac2552a9d254d6ab6073a7243 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 08:58:05 +0200
+Subject: xen/events: reduce externally visible helper functions
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 686464514fbebb6c8de4415238319e414c3500a4 ]
+
+get_evtchn_to_irq() has only one external user while irq_from_evtchn()
+provides the same functionality and is exported for a wider user base.
+Modify the only external user of get_evtchn_to_irq() to use
+irq_from_evtchn() instead and make get_evtchn_to_irq() static.
+
+evtchn_from_irq() and irq_from_virq() have a single external user and
+can easily be combined to a new helper irq_evtchn_from_virq() allowing
+to drop irq_from_virq() and to make evtchn_from_irq() static.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_2l.c | 8 ++++----
+ drivers/xen/events/events_base.c | 13 +++++++++----
+ drivers/xen/events/events_internal.h | 1 -
+ include/xen/events.h | 4 ++--
+ 4 files changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
+index b8f2f971c2f0f..e3585330cf98b 100644
+--- a/drivers/xen/events/events_2l.c
++++ b/drivers/xen/events/events_2l.c
+@@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
+ int i;
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
++ evtchn_port_t evtchn;
+
+ /* Timer interrupt has highest priority. */
+- irq = irq_from_virq(cpu, VIRQ_TIMER);
++ irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn);
+ if (irq != -1) {
+- evtchn_port_t evtchn = evtchn_from_irq(irq);
+ word_idx = evtchn / BITS_PER_LONG;
+ bit_idx = evtchn % BITS_PER_LONG;
+ if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
+@@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+ for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+ if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+ int word_idx = i / BITS_PER_EVTCHN_WORD;
+- printk(" %d: event %d -> irq %d%s%s%s\n",
++ printk(" %d: event %d -> irq %u%s%s%s\n",
+ cpu_from_evtchn(i), i,
+- get_evtchn_to_irq(i),
++ irq_from_evtchn(i),
+ sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+ ? "" : " l2-clear",
+ !sync_test_bit(i, BM(sh->evtchn_mask))
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index cd33a418344a8..57dfb512cdc5d 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -248,7 +248,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
+ return 0;
+ }
+
+-int get_evtchn_to_irq(evtchn_port_t evtchn)
++static int get_evtchn_to_irq(evtchn_port_t evtchn)
+ {
+ if (evtchn >= xen_evtchn_max_channels())
+ return -1;
+@@ -415,7 +415,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
+ /*
+ * Accessors for packed IRQ information.
+ */
+-evtchn_port_t evtchn_from_irq(unsigned irq)
++static evtchn_port_t evtchn_from_irq(unsigned int irq)
+ {
+ const struct irq_info *info = NULL;
+
+@@ -433,9 +433,14 @@ unsigned int irq_from_evtchn(evtchn_port_t evtchn)
+ }
+ EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
+-int irq_from_virq(unsigned int cpu, unsigned int virq)
++int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
++ evtchn_port_t *evtchn)
+ {
+- return per_cpu(virq_to_irq, cpu)[virq];
++ int irq = per_cpu(virq_to_irq, cpu)[virq];
++
++ *evtchn = evtchn_from_irq(irq);
++
++ return irq;
+ }
+
+ static enum ipi_vector ipi_from_irq(unsigned irq)
+diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
+index 4d3398eff9cdf..19ae31695edcf 100644
+--- a/drivers/xen/events/events_internal.h
++++ b/drivers/xen/events/events_internal.h
+@@ -33,7 +33,6 @@ struct evtchn_ops {
+
+ extern const struct evtchn_ops *evtchn_ops;
+
+-int get_evtchn_to_irq(evtchn_port_t evtchn);
+ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
+
+ unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 23932b0673dc7..7488cd51fbf4f 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -101,8 +101,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
+
+ /* Determine the IRQ which is bound to an event channel */
+ unsigned int irq_from_evtchn(evtchn_port_t evtchn);
+-int irq_from_virq(unsigned int cpu, unsigned int virq);
+-evtchn_port_t evtchn_from_irq(unsigned irq);
++int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
++ evtchn_port_t *evtchn);
+
+ int xen_set_callback_via(uint64_t via);
+ int xen_evtchn_do_upcall(void);
+--
+2.43.0
+
--- /dev/null
+From 2cd0afe1dddd41c5b851eb85ab30a6d800c8fe04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 08:24:46 +0200
+Subject: xen/events: remove some simple helpers from events_base.c
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 3bdb0ac350fe5e6301562143e4573971dd01ae0b ]
+
+The helper functions type_from_irq() and cpu_from_irq() are just one
+line functions used only internally.
+
+Open code them where needed. At the same time modify and rename
+get_evtchn_to_irq() to return a struct irq_info instead of the IRQ
+number.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 97 +++++++++++++-------------------
+ 1 file changed, 38 insertions(+), 59 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 57dfb512cdc5d..d3d7501628381 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -248,15 +248,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
+ return 0;
+ }
+
+-static int get_evtchn_to_irq(evtchn_port_t evtchn)
+-{
+- if (evtchn >= xen_evtchn_max_channels())
+- return -1;
+- if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+- return -1;
+- return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
+-}
+-
+ /* Get info for IRQ */
+ static struct irq_info *info_for_irq(unsigned irq)
+ {
+@@ -274,6 +265,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info)
+ irq_set_chip_data(irq, info);
+ }
+
++static struct irq_info *evtchn_to_info(evtchn_port_t evtchn)
++{
++ int irq;
++
++ if (evtchn >= xen_evtchn_max_channels())
++ return NULL;
++ if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
++ return NULL;
++ irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
++
++ return (irq < 0) ? NULL : info_for_irq(irq);
++}
++
+ /* Per CPU channel accounting */
+ static void channels_on_cpu_dec(struct irq_info *info)
+ {
+@@ -429,7 +433,9 @@ static evtchn_port_t evtchn_from_irq(unsigned int irq)
+
+ unsigned int irq_from_evtchn(evtchn_port_t evtchn)
+ {
+- return get_evtchn_to_irq(evtchn);
++ struct irq_info *info = evtchn_to_info(evtchn);
++
++ return info ? info->irq : -1;
+ }
+ EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
+@@ -473,25 +479,11 @@ static unsigned pirq_from_irq(unsigned irq)
+ return info->u.pirq.pirq;
+ }
+
+-static enum xen_irq_type type_from_irq(unsigned irq)
+-{
+- return info_for_irq(irq)->type;
+-}
+-
+-static unsigned cpu_from_irq(unsigned irq)
+-{
+- return info_for_irq(irq)->cpu;
+-}
+-
+ unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
+ {
+- int irq = get_evtchn_to_irq(evtchn);
+- unsigned ret = 0;
+-
+- if (irq != -1)
+- ret = cpu_from_irq(irq);
++ struct irq_info *info = evtchn_to_info(evtchn);
+
+- return ret;
++ return info ? info->cpu : 0;
+ }
+
+ static void do_mask(struct irq_info *info, u8 reason)
+@@ -540,13 +532,12 @@ static bool pirq_needs_eoi_flag(unsigned irq)
+ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ bool force_affinity)
+ {
+- int irq = get_evtchn_to_irq(evtchn);
+- struct irq_info *info = info_for_irq(irq);
++ struct irq_info *info = evtchn_to_info(evtchn);
+
+- BUG_ON(irq == -1);
++ BUG_ON(info == NULL);
+
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+- struct irq_data *data = irq_get_irq_data(irq);
++ struct irq_data *data = irq_get_irq_data(info->irq);
+
+ irq_data_update_affinity(data, cpumask_of(cpu));
+ irq_data_update_effective_affinity(data, cpumask_of(cpu));
+@@ -979,13 +970,13 @@ static void __unbind_from_irq(unsigned int irq)
+ }
+
+ if (VALID_EVTCHN(evtchn)) {
+- unsigned int cpu = cpu_from_irq(irq);
++ unsigned int cpu = info->cpu;
+ struct xenbus_device *dev;
+
+ if (!info->is_static)
+ xen_evtchn_close(evtchn);
+
+- switch (type_from_irq(irq)) {
++ switch (info->type) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+ break;
+@@ -1208,15 +1199,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ {
+ int irq;
+ int ret;
++ struct irq_info *info;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -ENOMEM;
+
+ mutex_lock(&irq_mapping_update_lock);
+
+- irq = get_evtchn_to_irq(evtchn);
++ info = evtchn_to_info(evtchn);
+
+- if (irq == -1) {
++ if (!info) {
+ irq = xen_allocate_irq_dynamic();
+ if (irq < 0)
+ goto out;
+@@ -1239,9 +1231,9 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ */
+ bind_evtchn_to_cpu(evtchn, 0, false);
+ } else {
+- struct irq_info *info = info_for_irq(irq);
+- if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
++ if (!WARN_ON(info->type != IRQT_EVTCHN))
+ info->refcnt++;
++ irq = info->irq;
+ }
+
+ out:
+@@ -1579,13 +1571,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority);
+
+ int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
+ {
+- int irq = get_evtchn_to_irq(evtchn);
+- struct irq_info *info;
+-
+- if (irq == -1)
+- return -ENOENT;
+-
+- info = info_for_irq(irq);
++ struct irq_info *info = evtchn_to_info(evtchn);
+
+ if (!info)
+ return -ENOENT;
+@@ -1601,7 +1587,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
+
+ int evtchn_get(evtchn_port_t evtchn)
+ {
+- int irq;
+ struct irq_info *info;
+ int err = -ENOENT;
+
+@@ -1610,11 +1595,7 @@ int evtchn_get(evtchn_port_t evtchn)
+
+ mutex_lock(&irq_mapping_update_lock);
+
+- irq = get_evtchn_to_irq(evtchn);
+- if (irq == -1)
+- goto done;
+-
+- info = info_for_irq(irq);
++ info = evtchn_to_info(evtchn);
+
+ if (!info)
+ goto done;
+@@ -1634,10 +1615,11 @@ EXPORT_SYMBOL_GPL(evtchn_get);
+
+ void evtchn_put(evtchn_port_t evtchn)
+ {
+- int irq = get_evtchn_to_irq(evtchn);
+- if (WARN_ON(irq == -1))
++ struct irq_info *info = evtchn_to_info(evtchn);
++
++ if (WARN_ON(!info))
+ return;
+- unbind_from_irq(irq);
++ unbind_from_irq(info->irq);
+ }
+ EXPORT_SYMBOL_GPL(evtchn_put);
+
+@@ -1667,12 +1649,10 @@ struct evtchn_loop_ctrl {
+
+ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+ {
+- int irq;
+- struct irq_info *info;
++ struct irq_info *info = evtchn_to_info(port);
+ struct xenbus_device *dev;
+
+- irq = get_evtchn_to_irq(port);
+- if (irq == -1)
++ if (!info)
+ return;
+
+ /*
+@@ -1697,7 +1677,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+ }
+ }
+
+- info = info_for_irq(irq);
+ if (xchg_acquire(&info->is_active, 1))
+ return;
+
+@@ -1711,7 +1690,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
+ }
+
+- generic_handle_irq(irq);
++ generic_handle_irq(info->irq);
+ }
+
+ int xen_evtchn_do_upcall(void)
+@@ -1769,7 +1748,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
+ mutex_lock(&irq_mapping_update_lock);
+
+ /* After resume the irq<->evtchn mappings are all cleared out */
+- BUG_ON(get_evtchn_to_irq(evtchn) != -1);
++ BUG_ON(evtchn_to_info(evtchn));
+ /* Expect irq to have been bound before,
+ so there should be a proper type */
+ BUG_ON(info->type == IRQT_UNBOUND);
+--
+2.43.0
+
--- /dev/null
+From 861d0c8cff0a50b41cac7a7ce7ed17b274d2e406 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Oct 2023 12:41:26 +0530
+Subject: xen: evtchn: Allow shared registration of IRQ handers
+
+From: Viresh Kumar <viresh.kumar@linaro.org>
+
+[ Upstream commit 9e90e58c11b74c2bddac4b2702cf79d36b981278 ]
+
+Currently the handling of events is supported either in the kernel or
+userspace, but not both.
+
+In order to support fast delivery of interrupts from the guest to the
+backend, we need to handle the Queue notify part of Virtio protocol in
+kernel and the rest in userspace.
+
+Update the interrupt handler registration flag to IRQF_SHARED for event
+channels, which would allow multiple entities to bind their interrupt
+handler for the same event channel port.
+
+Also increment the reference count of irq_info when multiple entities
+try to bind event channel to irqchip, so the unbinding happens only
+after all the users are gone.
+
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/99b1edfd3147c6b5d22a5139dab5861e767dc34a.1697439990.git.viresh.kumar@linaro.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 3 ++-
+ drivers/xen/evtchn.c | 2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index c50419638ac0a..cd33a418344a8 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -1235,7 +1235,8 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ bind_evtchn_to_cpu(evtchn, 0, false);
+ } else {
+ struct irq_info *info = info_for_irq(irq);
+- WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
++ if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
++ info->refcnt++;
+ }
+
+ out:
+diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
+index 9139a7364df53..59717628ca42b 100644
+--- a/drivers/xen/evtchn.c
++++ b/drivers/xen/evtchn.c
+@@ -397,7 +397,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
+ if (rc < 0)
+ goto err;
+
+- rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
++ rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, IRQF_SHARED,
+ u->name, evtchn);
+ if (rc < 0)
+ goto err;
+--
+2.43.0
+