]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Sat, 24 Feb 2024 03:37:43 +0000 (22:37 -0500)
committerSasha Levin <sashal@kernel.org>
Sat, 24 Feb 2024 03:37:43 +0000 (22:37 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
14 files changed:
queue-6.6/drm-amd-display-add-dpia-display-mode-validation-log.patch [new file with mode: 0644]
queue-6.6/drm-amd-display-fixed-integer-types-and-null-check-l.patch [new file with mode: 0644]
queue-6.6/drm-amd-display-request-usb4-bw-for-mst-streams.patch [new file with mode: 0644]
queue-6.6/mptcp-add-currestab-mib-counter-support.patch [new file with mode: 0644]
queue-6.6/mptcp-corner-case-locking-for-rx-path-fields-initial.patch [new file with mode: 0644]
queue-6.6/mptcp-fix-more-tx-path-fields-initialization.patch [new file with mode: 0644]
queue-6.6/mptcp-use-mptcp_set_state.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/xen-events-close-evtchn-after-mapping-cleanup.patch [new file with mode: 0644]
queue-6.6/xen-events-drop-xen_allocate_irqs_dynamic.patch [new file with mode: 0644]
queue-6.6/xen-events-modify-internal-un-bind-interfaces.patch [new file with mode: 0644]
queue-6.6/xen-events-reduce-externally-visible-helper-function.patch [new file with mode: 0644]
queue-6.6/xen-events-remove-some-simple-helpers-from-events_ba.patch [new file with mode: 0644]
queue-6.6/xen-evtchn-allow-shared-registration-of-irq-handers.patch [new file with mode: 0644]

diff --git a/queue-6.6/drm-amd-display-add-dpia-display-mode-validation-log.patch b/queue-6.6/drm-amd-display-add-dpia-display-mode-validation-log.patch
new file mode 100644 (file)
index 0000000..cbac0ae
--- /dev/null
@@ -0,0 +1,313 @@
+From aa956140b0a3c9d836aef216458ff3539ecc3e74 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Dec 2023 00:01:15 -0500
+Subject: drm/amd/display: Add dpia display mode validation logic
+
+From: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
+
+[ Upstream commit 59f1622a5f05d948a7c665a458a3dd76ba73015e ]
+
+[Why]
+If bandwidth allocation feature is enabled, connection manager wont
+limit the dp tunnel bandwidth. So, need to do display mode validation
+for streams on dpia links to avoid oversubscription of dp tunnel
+bandwidth.
+
+[How]
+- To read non reduced link rate and lane count and update
+  reported link capability.
+- To calculate the bandwidth required for streams of dpia links
+  per host router and validate against the allocated bandwidth for
+  the host router.
+
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Reviewed-by: PeiChen Huang <peichen.huang@amd.com>
+Reviewed-by: Aric Cyr <aric.cyr@amd.com>
+Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Signed-off-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 0484e05d048b ("drm/amd/display: fixed integer types and null check locations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../drm/amd/display/dc/core/dc_link_exports.c |   2 +-
+ drivers/gpu/drm/amd/display/dc/dc.h           |   4 +-
+ drivers/gpu/drm/amd/display/dc/dc_dp_types.h  |   6 +
+ drivers/gpu/drm/amd/display/dc/dc_types.h     |   2 +
+ .../dc/link/protocols/link_dp_dpia_bw.c       | 130 +++++++++++++-----
+ 5 files changed, 104 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+index ed94187c2afa2..f365773d57148 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+@@ -497,7 +497,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
+       link->dc->link_srv->enable_hpd_filter(link, enable);
+ }
+-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
++bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
+ {
+       return dc->link_srv->validate_dpia_bandwidth(streams, count);
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 3f33740e2f659..5f2eac868b747 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -2116,11 +2116,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
+  *
+  * @dc: pointer to dc struct
+  * @stream: pointer to all possible streams
+- * @num_streams: number of valid DPIA streams
++ * @count: number of valid DPIA streams
+  *
+  * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
+  */
+-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams,
++bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams,
+               const unsigned int count);
+ /* Sink Interfaces - A sink corresponds to a display output device */
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+index cfaa39c5dd16b..83719f5bea495 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+@@ -1433,6 +1433,12 @@ struct dp_trace {
+ #ifndef DP_TUNNELING_STATUS
+ #define DP_TUNNELING_STATUS                           0xE0025 /* 1.4a */
+ #endif
++#ifndef DP_TUNNELING_MAX_LINK_RATE
++#define DP_TUNNELING_MAX_LINK_RATE                    0xE0028 /* 1.4a */
++#endif
++#ifndef DP_TUNNELING_MAX_LANE_COUNT
++#define DP_TUNNELING_MAX_LANE_COUNT                   0xE0029 /* 1.4a */
++#endif
+ #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
+ #define DPTX_BW_ALLOCATION_MODE_CONTROL                       0xE0030 /* 1.4a */
+ #endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
+index accffba5a6834..19b7314811ae2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
+@@ -1121,6 +1121,8 @@ struct dc_dpia_bw_alloc {
+       int bw_granularity;    // BW Granularity
+       bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
+       bool response_ready;   // Response ready from the CM side
++      uint8_t nrd_max_lane_count; // Non-reduced max lane count
++      uint8_t nrd_max_link_rate; // Non-reduced max link rate
+ };
+ #define MAX_SINKS_PER_LINK 4
+diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+index d6e1f969bfd52..a7aa8c9da868f 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+@@ -59,6 +59,7 @@ static void reset_bw_alloc_struct(struct dc_link *link)
+       link->dpia_bw_alloc_config.estimated_bw = 0;
+       link->dpia_bw_alloc_config.bw_granularity = 0;
+       link->dpia_bw_alloc_config.response_ready = false;
++      link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+ }
+ #define BW_GRANULARITY_0 4 // 0.25 Gbps
+@@ -104,6 +105,32 @@ static int get_estimated_bw(struct dc_link *link)
+       return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+ }
++static int get_non_reduced_max_link_rate(struct dc_link *link)
++{
++      uint8_t nrd_max_link_rate = 0;
++
++      core_link_read_dpcd(
++                      link,
++                      DP_TUNNELING_MAX_LINK_RATE,
++                      &nrd_max_link_rate,
++                      sizeof(uint8_t));
++
++      return nrd_max_link_rate;
++}
++
++static int get_non_reduced_max_lane_count(struct dc_link *link)
++{
++      uint8_t nrd_max_lane_count = 0;
++
++      core_link_read_dpcd(
++                      link,
++                      DP_TUNNELING_MAX_LANE_COUNT,
++                      &nrd_max_lane_count,
++                      sizeof(uint8_t));
++
++      return nrd_max_lane_count;
++}
++
+ /*
+  * Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
+  * granuality, Driver_ID, CM_Group, & populate the BW allocation structs
+@@ -111,13 +138,20 @@ static int get_estimated_bw(struct dc_link *link)
+  */
+ static void init_usb4_bw_struct(struct dc_link *link)
+ {
+-      // Init the known values
++      reset_bw_alloc_struct(link);
++
++      /* init the known values */
+       link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
+       link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
++      link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link);
++      link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link);
+       DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
+               __func__, link->dpia_bw_alloc_config.bw_granularity,
+               link->dpia_bw_alloc_config.estimated_bw);
++      DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n",
++              __func__, link->dpia_bw_alloc_config.nrd_max_link_rate,
++              link->dpia_bw_alloc_config.nrd_max_lane_count);
+ }
+ static uint8_t get_lowest_dpia_index(struct dc_link *link)
+@@ -142,39 +176,50 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link)
+ }
+ /*
+- * Get the Max Available BW or Max Estimated BW for each Host Router
++ * Get the maximum dp tunnel banwidth of host router
+  *
+- * @link: pointer to the dc_link struct instance
+- * @type: ESTIMATD BW or MAX AVAILABLE BW
++ * @dc: pointer to the dc struct instance
++ * @hr_index: host router index
+  *
+- * return: response_ready flag from dc_link struct
++ * return: host router maximum dp tunnel bandwidth
+  */
+-static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
++static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index)
+ {
+-      const struct dc *dc_struct = link->dc;
+-      uint8_t lowest_dpia_index = get_lowest_dpia_index(link);
+-      uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0;
+-      struct dc_link *link_temp;
++      uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]);
++      uint8_t hr_index_temp = 0;
++      struct dc_link *link_dpia_primary, *link_dpia_secondary;
+       int total_bw = 0;
+-      int i;
+-
+-      for (i = 0; i < MAX_PIPES * 2; ++i) {
+-              if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+-                      continue;
++      for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) {
+-              link_temp = dc_struct->links[i];
+-              if (!link_temp || !link_temp->hpd_status)
++              if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+                       continue;
+-              idx_temp = (link_temp->link_index - lowest_dpia_index) / 2;
+-
+-              if (idx_temp == idx) {
+-
+-                      if (type == HOST_ROUTER_BW_ESTIMATED)
+-                              total_bw += link_temp->dpia_bw_alloc_config.estimated_bw;
+-                      else if (type == HOST_ROUTER_BW_ALLOCATED)
+-                              total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw;
++              hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2;
++
++              if (hr_index_temp == hr_index) {
++                      link_dpia_primary = dc->links[i];
++                      link_dpia_secondary = dc->links[i + 1];
++
++                      /**
++                       * If BW allocation enabled on both DPIAs, then
++                       * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary)
++                       * otherwise HR BW = Estimated(bw alloc enabled dpia)
++                       */
++                      if ((link_dpia_primary->hpd_status &&
++                              link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
++                              (link_dpia_secondary->hpd_status &&
++                              link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
++                              total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
++                                      link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
++                      } else if (link_dpia_primary->hpd_status &&
++                                      link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
++                              total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
++                      } else if (link_dpia_secondary->hpd_status &&
++                              link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) {
++                              total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw;
++                      }
++                      break;
+               }
+       }
+@@ -194,7 +239,6 @@ static void dpia_bw_alloc_unplug(struct dc_link *link)
+       if (link) {
+               DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n",
+                       __func__, link->link_index);
+-              link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+               reset_bw_alloc_struct(link);
+       }
+ }
+@@ -397,7 +441,7 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+               if (!timeout)
+                       ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
+               else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
+-                      ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
++                      ret = link->dpia_bw_alloc_config.sink_allocated_bw;
+       }
+       //2. Cold Unplug
+       else if (!link->hpd_status)
+@@ -439,29 +483,41 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r
+ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
+ {
+       bool ret = true;
+-      int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 };
+-      uint8_t lowest_dpia_index = 0, dpia_index = 0;
+-      uint8_t i;
++      int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0;
++      uint8_t lowest_dpia_index, i, hr_index;
+       if (!num_dpias || num_dpias > MAX_DPIA_NUM)
+               return ret;
+-      //Get total Host Router BW & Validate against each Host Router max BW
++      lowest_dpia_index = get_lowest_dpia_index(link[0]);
++
++      /* get total Host Router BW with granularity for the given modes */
+       for (i = 0; i < num_dpias; ++i) {
++              int granularity_Gbps = 0;
++              int bw_granularity = 0;
+               if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled)
+                       continue;
+-              lowest_dpia_index = get_lowest_dpia_index(link[i]);
+               if (link[i]->link_index < lowest_dpia_index)
+                       continue;
+-              dpia_index = (link[i]->link_index - lowest_dpia_index) / 2;
+-              bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i];
+-              if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) {
++              granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity);
++              bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps +
++                              ((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0);
+-                      ret = false;
+-                      break;
++              hr_index = (link[i]->link_index - lowest_dpia_index) / 2;
++              bw_needed_per_hr[hr_index] += bw_granularity;
++      }
++
++      /* validate against each Host Router max BW */
++      for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) {
++              if (bw_needed_per_hr[hr_index]) {
++                      host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index);
++                      if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) {
++                              ret = false;
++                              break;
++                      }
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/drm-amd-display-fixed-integer-types-and-null-check-l.patch b/queue-6.6/drm-amd-display-fixed-integer-types-and-null-check-l.patch
new file mode 100644 (file)
index 0000000..810f347
--- /dev/null
@@ -0,0 +1,114 @@
+From 42d08dd1076db03268a7dc8d3e95523b099be90f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jan 2024 16:40:37 -0500
+Subject: drm/amd/display: fixed integer types and null check locations
+
+From: Sohaib Nadeem <sohaib.nadeem@amd.com>
+
+[ Upstream commit 0484e05d048b66d01d1f3c1d2306010bb57d8738 ]
+
+[why]:
+issues fixed:
+- comparison with wider integer type in loop condition which can cause
+infinite loops
+- pointer dereference before null check
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Josip Pavic <josip.pavic@amd.com>
+Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Signed-off-by: Sohaib Nadeem <sohaib.nadeem@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../gpu/drm/amd/display/dc/bios/bios_parser2.c   | 16 ++++++++++------
+ .../drm/amd/display/dc/link/link_validation.c    |  2 +-
+ 2 files changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+index bbf2a465f400b..4c3c4c8de1cfc 100644
+--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+@@ -1860,19 +1860,21 @@ static enum bp_result get_firmware_info_v3_2(
+               /* Vega12 */
+               smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
+                                                       DATA_TABLES(smu_info));
+-              DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+               if (!smu_info_v3_2)
+                       return BP_RESULT_BADBIOSTABLE;
++              DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
++
+               info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
+       } else if (revision.minor == 3) {
+               /* Vega20 */
+               smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
+                                                       DATA_TABLES(smu_info));
+-              DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+               if (!smu_info_v3_3)
+                       return BP_RESULT_BADBIOSTABLE;
++              DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
++
+               info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
+       }
+@@ -2435,10 +2437,11 @@ static enum bp_result get_integrated_info_v11(
+       info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
+                                       DATA_TABLES(integratedsysteminfo));
+-      DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+       if (info_v11 == NULL)
+               return BP_RESULT_BADBIOSTABLE;
++      DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
++
+       info->gpu_cap_info =
+       le32_to_cpu(info_v11->gpucapinfo);
+       /*
+@@ -2650,11 +2653,12 @@ static enum bp_result get_integrated_info_v2_1(
+       info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
+                                       DATA_TABLES(integratedsysteminfo));
+-      DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+       if (info_v2_1 == NULL)
+               return BP_RESULT_BADBIOSTABLE;
++      DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
++
+       info->gpu_cap_info =
+       le32_to_cpu(info_v2_1->gpucapinfo);
+       /*
+@@ -2812,11 +2816,11 @@ static enum bp_result get_integrated_info_v2_2(
+       info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
+                                       DATA_TABLES(integratedsysteminfo));
+-      DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+-
+       if (info_v2_2 == NULL)
+               return BP_RESULT_BADBIOSTABLE;
++      DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
++
+       info->gpu_cap_info =
+       le32_to_cpu(info_v2_2->gpucapinfo);
+       /*
+diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+index 8fe66c3678508..5b0bc7f6a188c 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
++++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+@@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un
+       struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
+       int num_dpias = 0;
+-      for (uint8_t i = 0; i < num_streams; ++i) {
++      for (unsigned int i = 0; i < num_streams; ++i) {
+               if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
+                       /* new dpia sst stream, check whether it exceeds max dpia */
+                       if (num_dpias >= MAX_DPIA_NUM)
+-- 
+2.43.0
+
diff --git a/queue-6.6/drm-amd-display-request-usb4-bw-for-mst-streams.patch b/queue-6.6/drm-amd-display-request-usb4-bw-for-mst-streams.patch
new file mode 100644 (file)
index 0000000..2a870cd
--- /dev/null
@@ -0,0 +1,373 @@
+From 351be241be9a81775f7181e1bcf3cd6eabeb70a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Dec 2023 23:16:34 +0800
+Subject: drm/amd/display: Request usb4 bw for mst streams
+
+From: Peichen Huang <peichen.huang@amd.com>
+
+[ Upstream commit 5f3bce13266e6fe2f7a46f94d8bc94d5274e276b ]
+
+[WHY]
+When usb4 bandwidth allocation mode is enabled, driver need to request
+bandwidth from connection manager. For mst link,  the requested
+bandwidth should be big enough for all remote streams.
+
+[HOW]
+- If mst link, the requested bandwidth should be the sum of all mst
+  streams bandwidth added with dp MTPH overhead.
+- Allocate/deallcate usb4 bandwidth when setting dpms on/off.
+- When doing display mode validation, driver also need to consider total
+  bandwidth of all mst streams for mst link.
+
+Reviewed-by: Cruise Hung <cruise.hung@amd.com>
+Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Signed-off-by: Peichen Huang <peichen.huang@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 0484e05d048b ("drm/amd/display: fixed integer types and null check locations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc_types.h     | 12 ++--
+ .../gpu/drm/amd/display/dc/link/link_dpms.c   | 42 ++++++++++---
+ .../drm/amd/display/dc/link/link_validation.c | 60 +++++++++++++++----
+ .../dc/link/protocols/link_dp_dpia_bw.c       | 59 +++++++++++++-----
+ .../dc/link/protocols/link_dp_dpia_bw.h       |  9 +++
+ 5 files changed, 144 insertions(+), 38 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
+index 19b7314811ae2..cc173ecf78e0c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
+@@ -1110,23 +1110,25 @@ struct dc_panel_config {
+       } ilr;
+ };
++#define MAX_SINKS_PER_LINK 4
++
+ /*
+  *  USB4 DPIA BW ALLOCATION STRUCTS
+  */
+ struct dc_dpia_bw_alloc {
+-      int sink_verified_bw;  // The Verified BW that sink can allocated and use that has been verified already
+-      int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
+-      int sink_max_bw;       // The Max BW that sink can require/support
++      int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
++      int link_verified_bw;  // The Verified BW that link can allocated and use that has been verified already
++      int link_max_bw;       // The Max BW that link can require/support
++      int allocated_bw;      // The Actual Allocated BW for this DPIA
+       int estimated_bw;      // The estimated available BW for this DPIA
+       int bw_granularity;    // BW Granularity
++      int dp_overhead;       // DP overhead in dp tunneling
+       bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
+       bool response_ready;   // Response ready from the CM side
+       uint8_t nrd_max_lane_count; // Non-reduced max lane count
+       uint8_t nrd_max_link_rate; // Non-reduced max link rate
+ };
+-#define MAX_SINKS_PER_LINK 4
+-
+ enum dc_hpd_enable_select {
+       HPD_EN_FOR_ALL_EDP = 0,
+       HPD_EN_FOR_PRIMARY_EDP_ONLY,
+diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+index b9768cd9b8a07..4901e27f678bc 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
++++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+@@ -2071,17 +2071,11 @@ static enum dc_status enable_link_dp(struct dc_state *state,
+               }
+       }
+-      /*
+-       * If the link is DP-over-USB4 do the following:
+-       * - Train with fallback when enabling DPIA link. Conventional links are
++      /* Train with fallback when enabling DPIA link. Conventional links are
+        * trained with fallback during sink detection.
+-       * - Allocate only what the stream needs for bw in Gbps. Inform the CM
+-       * in case stream needs more or less bw from what has been allocated
+-       * earlier at plug time.
+        */
+-      if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
++      if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+               do_fallback = true;
+-      }
+       /*
+        * Temporary w/a to get DP2.0 link rates to work with SST.
+@@ -2263,6 +2257,32 @@ static enum dc_status enable_link(
+       return status;
+ }
++static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw)
++{
++      return true;
++}
++
++static bool allocate_usb4_bandwidth(struct dc_stream_state *stream)
++{
++      bool ret;
++
++      int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing,
++                      dc_link_get_highest_encoding_format(stream->sink->link));
++
++      ret = allocate_usb4_bandwidth_for_stream(stream, bw);
++
++      return ret;
++}
++
++static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream)
++{
++      bool ret;
++
++      ret = allocate_usb4_bandwidth_for_stream(stream, 0);
++
++      return ret;
++}
++
+ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
+ {
+       struct dc  *dc = pipe_ctx->stream->ctx->dc;
+@@ -2299,6 +2319,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
+       update_psp_stream_config(pipe_ctx, true);
+       dc->hwss.blank_stream(pipe_ctx);
++      if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
++              deallocate_usb4_bandwidth(pipe_ctx->stream);
++
+       if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+               deallocate_mst_payload(pipe_ctx);
+       else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
+@@ -2520,6 +2543,9 @@ void link_set_dpms_on(
+               }
+       }
++      if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
++              allocate_usb4_bandwidth(pipe_ctx->stream);
++
+       if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+               allocate_mst_payload(pipe_ctx);
+       else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
+diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+index b45fda96eaf64..8fe66c3678508 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
++++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+@@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing(
+       return DC_OK;
+ }
++/*
++ * This function calculates the bandwidth required for the stream timing
++ * and aggregates the stream bandwidth for the respective dpia link
++ *
++ * @stream: pointer to the dc_stream_state struct instance
++ * @num_streams: number of streams to be validated
++ *
++ * return: true if validation is succeeded
++ */
+ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams)
+ {
+-      bool ret = true;
+-      int bw_needed[MAX_DPIA_NUM];
+-      struct dc_link *link[MAX_DPIA_NUM];
+-
+-      if (!num_streams || num_streams > MAX_DPIA_NUM)
+-              return ret;
++      int bw_needed[MAX_DPIA_NUM] = {0};
++      struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
++      int num_dpias = 0;
+       for (uint8_t i = 0; i < num_streams; ++i) {
++              if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
++                      /* new dpia sst stream, check whether it exceeds max dpia */
++                      if (num_dpias >= MAX_DPIA_NUM)
++                              return false;
+-              link[i] = stream[i].link;
+-              bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+-                              dc_link_get_highest_encoding_format(link[i]));
++                      dpia_link[num_dpias] = stream[i].link;
++                      bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
++                                      dc_link_get_highest_encoding_format(dpia_link[num_dpias]));
++                      num_dpias++;
++              } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
++                      uint8_t j = 0;
++                      /* check whether its a known dpia link */
++                      for (; j < num_dpias; ++j) {
++                              if (dpia_link[j] == stream[i].link)
++                                      break;
++                      }
++
++                      if (j == num_dpias) {
++                              /* new dpia mst stream, check whether it exceeds max dpia */
++                              if (num_dpias >= MAX_DPIA_NUM)
++                                      return false;
++                              else {
++                                      dpia_link[j] = stream[i].link;
++                                      num_dpias++;
++                              }
++                      }
++
++                      bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
++                              dc_link_get_highest_encoding_format(dpia_link[j]));
++              }
+       }
+-      ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
++      /* Include dp overheads */
++      for (uint8_t i = 0; i < num_dpias; ++i) {
++              int dp_overhead = 0;
++
++              dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]);
++              bw_needed[i] += dp_overhead;
++      }
+-      return ret;
++      return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias);
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+index a7aa8c9da868f..4ef1a6a1d1295 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+@@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
+ static void reset_bw_alloc_struct(struct dc_link *link)
+ {
+       link->dpia_bw_alloc_config.bw_alloc_enabled = false;
+-      link->dpia_bw_alloc_config.sink_verified_bw = 0;
+-      link->dpia_bw_alloc_config.sink_max_bw = 0;
++      link->dpia_bw_alloc_config.link_verified_bw = 0;
++      link->dpia_bw_alloc_config.link_max_bw = 0;
++      link->dpia_bw_alloc_config.allocated_bw = 0;
+       link->dpia_bw_alloc_config.estimated_bw = 0;
+       link->dpia_bw_alloc_config.bw_granularity = 0;
++      link->dpia_bw_alloc_config.dp_overhead = 0;
+       link->dpia_bw_alloc_config.response_ready = false;
+-      link->dpia_bw_alloc_config.sink_allocated_bw = 0;
++      link->dpia_bw_alloc_config.nrd_max_lane_count = 0;
++      link->dpia_bw_alloc_config.nrd_max_link_rate = 0;
++      for (int i = 0; i < MAX_SINKS_PER_LINK; i++)
++              link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0;
++      DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index);
+ }
+ #define BW_GRANULARITY_0 4 // 0.25 Gbps
+@@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in
+                               link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
+                               (link_dpia_secondary->hpd_status &&
+                               link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
+-                              total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+-                                      link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
++                                      total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
++                                              link_dpia_secondary->dpia_bw_alloc_config.allocated_bw;
+                       } else if (link_dpia_primary->hpd_status &&
+                                       link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
+                               total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
+@@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
+       /* Error check whether requested and allocated are equal */
+       req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+-      if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) {
++      if (req_bw == link->dpia_bw_alloc_config.allocated_bw) {
+               DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
+                       __func__, link->link_index);
+       }
+@@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
+               DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
+                       __func__, link->link_index);
+               DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
+-                      __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed);
++                      __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed);
+-              link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed;
++              link->dpia_bw_alloc_config.allocated_bw = bw_needed;
+               link->dpia_bw_alloc_config.response_ready = true;
+               break;
+@@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+       if (link->hpd_status && peak_bw > 0) {
+               // If DP over USB4 then we need to check BW allocation
+-              link->dpia_bw_alloc_config.sink_max_bw = peak_bw;
+-              set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
++              link->dpia_bw_alloc_config.link_max_bw = peak_bw;
++              set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw);
+               do {
+                       if (timeout > 0)
+@@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+               if (!timeout)
+                       ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
+-              else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
+-                      ret = link->dpia_bw_alloc_config.sink_allocated_bw;
++              else if (link->dpia_bw_alloc_config.allocated_bw > 0)
++                      ret = link->dpia_bw_alloc_config.allocated_bw;
+       }
+       //2. Cold Unplug
+       else if (!link->hpd_status)
+@@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
+ out:
+       return ret;
+ }
+-
+ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
+ {
+       bool ret = false;
+@@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r
+       DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
+               __func__, link->link_index, link->hpd_status,
+-              link->dpia_bw_alloc_config.sink_allocated_bw, req_bw);
++              link->dpia_bw_alloc_config.allocated_bw, req_bw);
+       if (!get_bw_alloc_proceed_flag(link))
+               goto out;
+@@ -523,3 +528,29 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const
+       return ret;
+ }
++
++int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link)
++{
++      int dp_overhead = 0, link_mst_overhead = 0;
++
++      if (!get_bw_alloc_proceed_flag((link)))
++              return dp_overhead;
++
++      /* if its mst link, add MTPH overhead */
++      if ((link->type == dc_connection_mst_branch) &&
++              !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
++              /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH
++               * MST overhead is 1/64 of link bandwidth (excluding any overhead)
++               */
++              const struct dc_link_settings *link_cap =
++                      dc_link_get_link_cap(link);
++              uint32_t link_bw_in_kbps =
++                      link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8;
++              link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0);
++      }
++
++      /* add all the overheads */
++      dp_overhead = link_mst_overhead;
++
++      return dp_overhead;
++}
+diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+index 981bc4eb6120e..3b6d8494f9d5d 100644
+--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+@@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
+  */
+ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias);
++/*
++ * Obtain all the DP overheads in dp tunneling for the dpia link
++ *
++ * @link: pointer to the dc_link struct instance
++ *
++ * return: DP overheads in DP tunneling
++ */
++int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link);
++
+ #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
+-- 
+2.43.0
+
diff --git a/queue-6.6/mptcp-add-currestab-mib-counter-support.patch b/queue-6.6/mptcp-add-currestab-mib-counter-support.patch
new file mode 100644 (file)
index 0000000..7fedbdb
--- /dev/null
@@ -0,0 +1,115 @@
+From 62b7728c7f2cf8fbb7c4c0fd8611b66e5ec79647 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Dec 2023 13:47:22 +0100
+Subject: mptcp: add CurrEstab MIB counter support
+
+From: Geliang Tang <geliang.tang@linux.dev>
+
+[ Upstream commit d9cd27b8cd191133e287e5de107f971136abe8a2 ]
+
+Add a new MIB counter named MPTCP_MIB_CURRESTAB to count current
+established MPTCP connections, similar to TCP_MIB_CURRESTAB. This is
+useful to quickly list the number of MPTCP connections without having to
+iterate over all of them.
+
+This patch adds a new helper function mptcp_set_state(): if the state
+switches from or to ESTABLISHED state, this newly added counter is
+incremented. This helper is going to be used in the following patch.
+
+Similar to MPTCP_INC_STATS(), a new helper called MPTCP_DEC_STATS() is
+also needed to decrement a MIB counter.
+
+Signed-off-by: Geliang Tang <geliang.tang@linux.dev>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/mib.c      |  1 +
+ net/mptcp/mib.h      |  8 ++++++++
+ net/mptcp/protocol.c | 18 ++++++++++++++++++
+ net/mptcp/protocol.h |  1 +
+ 4 files changed, 28 insertions(+)
+
+diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
+index a0990c365a2ea..c30405e768337 100644
+--- a/net/mptcp/mib.c
++++ b/net/mptcp/mib.c
+@@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
+       SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
+       SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
+       SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
++      SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+       SNMP_MIB_SENTINEL
+ };
+diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
+index cae71d9472529..dd7fd1f246b5f 100644
+--- a/net/mptcp/mib.h
++++ b/net/mptcp/mib.h
+@@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
+                                        * conflict with another subflow while updating msk rcv wnd
+                                        */
+       MPTCP_MIB_RCVWNDCONFLICT,       /* Conflict with while updating msk rcv wnd */
++      MPTCP_MIB_CURRESTAB,            /* Current established MPTCP connections */
+       __MPTCP_MIB_MAX
+ };
+@@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
+               __SNMP_INC_STATS(net->mib.mptcp_statistics, field);
+ }
++static inline void MPTCP_DEC_STATS(struct net *net,
++                                 enum linux_mptcp_mib_field field)
++{
++      if (likely(net->mib.mptcp_statistics))
++              SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
++}
++
+ bool mptcp_mib_alloc(struct net *net);
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 9d4d5dbdbb53b..7765514451ded 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2877,6 +2877,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
+       release_sock(ssk);
+ }
++void mptcp_set_state(struct sock *sk, int state)
++{
++      int oldstate = sk->sk_state;
++
++      switch (state) {
++      case TCP_ESTABLISHED:
++              if (oldstate != TCP_ESTABLISHED)
++                      MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
++              break;
++
++      default:
++              if (oldstate == TCP_ESTABLISHED)
++                      MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
++      }
++
++      inet_sk_state_store(sk, state);
++}
++
+ static const unsigned char new_state[16] = {
+       /* current state:     new state:      action:   */
+       [0 /* (Invalid) */] = TCP_CLOSE,
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index 094d3fd47a92f..01778ffa86be1 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -642,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
+ void mptcp_cancel_work(struct sock *sk);
+ void __mptcp_unaccepted_force_close(struct sock *sk);
+ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
++void mptcp_set_state(struct sock *sk, int state);
+ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+                          const struct mptcp_addr_info *b, bool use_port);
+-- 
+2.43.0
+
diff --git a/queue-6.6/mptcp-corner-case-locking-for-rx-path-fields-initial.patch b/queue-6.6/mptcp-corner-case-locking-for-rx-path-fields-initial.patch
new file mode 100644 (file)
index 0000000..b0e2b5e
--- /dev/null
@@ -0,0 +1,280 @@
+From 9a619e8f8646e9b6c6061d04a4fa75017336a31c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 19:03:52 +0100
+Subject: mptcp: corner case locking for rx path fields initialization
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit e4a0fa47e816e186f6b4c0055d07eeec42d11871 ]
+
+Most MPTCP-level related fields are under the mptcp data lock
+protection, but are written one-off without such lock at MPC
+complete time, both for the client and the server
+
+Leverage the mptcp_propagate_state() infrastructure to move such
+initialization under the proper lock client-wise.
+
+The server side critical init steps are done by
+mptcp_subflow_fully_established(): ensure the caller properly held the
+relevant lock, and avoid acquiring the same lock in the nested scopes.
+
+There are no real potential races, as write access to such fields
+is implicitly serialized by the MPTCP state machine; the primary
+goal is consistency.
+
+Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/fastopen.c |  6 ++---
+ net/mptcp/options.c  |  9 +++----
+ net/mptcp/protocol.c |  9 ++++---
+ net/mptcp/protocol.h |  9 +++----
+ net/mptcp/subflow.c  | 56 +++++++++++++++++++++++++-------------------
+ 5 files changed, 50 insertions(+), 39 deletions(-)
+
+diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
+index 74698582a2859..ad28da655f8bc 100644
+--- a/net/mptcp/fastopen.c
++++ b/net/mptcp/fastopen.c
+@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
+       mptcp_data_unlock(sk);
+ }
+-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+-                                 const struct mptcp_options_received *mp_opt)
++void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
++                                   const struct mptcp_options_received *mp_opt)
+ {
+       struct sock *sk = (struct sock *)msk;
+       struct sk_buff *skb;
+-      mptcp_data_lock(sk);
+       skb = skb_peek_tail(&sk->sk_receive_queue);
+       if (skb) {
+               WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
+@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
+       }
+       pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
+-      mptcp_data_unlock(sk);
+ }
+diff --git a/net/mptcp/options.c b/net/mptcp/options.c
+index d2527d189a799..e3e96a49f9229 100644
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
+               /* subflows are fully established as soon as we get any
+                * additional ack, including ADD_ADDR.
+                */
+-              subflow->fully_established = 1;
+-              WRITE_ONCE(msk->fully_established, true);
+-              goto check_notify;
++              goto set_fully_established;
+       }
+       /* If the first established packet does not contain MP_CAPABLE + data
+@@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
+ set_fully_established:
+       if (unlikely(!READ_ONCE(msk->pm.server_side)))
+               pr_warn_once("bogus mpc option on established client sk");
+-      mptcp_subflow_fully_established(subflow, mp_opt);
++
++      mptcp_data_lock((struct sock *)msk);
++      __mptcp_subflow_fully_established(msk, subflow, mp_opt);
++      mptcp_data_unlock((struct sock *)msk);
+ check_notify:
+       /* if the subflow is not already linked into the conn_list, we can't
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 55a90a7b7b517..d369274113108 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -3195,6 +3195,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ {
+       struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+       struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
++      struct mptcp_subflow_context *subflow;
+       struct mptcp_sock *msk;
+       if (!nsk)
+@@ -3235,7 +3236,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+       /* The msk maintain a ref to each subflow in the connections list */
+       WRITE_ONCE(msk->first, ssk);
+-      list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
++      subflow = mptcp_subflow_ctx(ssk);
++      list_add(&subflow->node, &msk->conn_list);
+       sock_hold(ssk);
+       /* new mpc subflow takes ownership of the newly
+@@ -3250,6 +3252,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+       __mptcp_propagate_sndbuf(nsk, ssk);
+       mptcp_rcv_space_init(msk, ssk);
++
++      if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
++              __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+       bh_unlock_sock(nsk);
+       /* note: the newly allocated socket refcount is 2 now */
+@@ -3525,8 +3530,6 @@ void mptcp_finish_connect(struct sock *ssk)
+        * accessing the field below
+        */
+       WRITE_ONCE(msk->local_key, subflow->local_key);
+-      WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+-      WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+       mptcp_pm_new_connection(msk, ssk, 0);
+ }
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index 01778ffa86be1..c9516882cdd4c 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -623,8 +623,9 @@ int mptcp_allow_join_id0(const struct net *net);
+ unsigned int mptcp_stale_loss_cnt(const struct net *net);
+ int mptcp_get_pm_type(const struct net *net);
+ const char *mptcp_get_scheduler(const struct net *net);
+-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+-                                   const struct mptcp_options_received *mp_opt);
++void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
++                                     struct mptcp_subflow_context *subflow,
++                                     const struct mptcp_options_received *mp_opt);
+ bool __mptcp_retransmit_pending_data(struct sock *sk);
+ void mptcp_check_and_set_pending(struct sock *sk);
+ void __mptcp_push_pending(struct sock *sk, unsigned int flags);
+@@ -938,8 +939,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
+                            enum mptcp_event_type event);
+ bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
+-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+-                                 const struct mptcp_options_received *mp_opt);
++void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
++                                   const struct mptcp_options_received *mp_opt);
+ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+                                             struct request_sock *req);
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index ba739e7009221..43d6ee4328141 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -441,20 +441,6 @@ void __mptcp_sync_state(struct sock *sk, int state)
+       }
+ }
+-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
+-{
+-      struct mptcp_sock *msk = mptcp_sk(sk);
+-
+-      mptcp_data_lock(sk);
+-      if (!sock_owned_by_user(sk)) {
+-              __mptcp_sync_state(sk, ssk->sk_state);
+-      } else {
+-              msk->pending_state = ssk->sk_state;
+-              __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+-      }
+-      mptcp_data_unlock(sk);
+-}
+-
+ static void subflow_set_remote_key(struct mptcp_sock *msk,
+                                  struct mptcp_subflow_context *subflow,
+                                  const struct mptcp_options_received *mp_opt)
+@@ -476,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
+       atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
+ }
++static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
++                                struct mptcp_subflow_context *subflow,
++                                const struct mptcp_options_received *mp_opt)
++{
++      struct mptcp_sock *msk = mptcp_sk(sk);
++
++      mptcp_data_lock(sk);
++      if (mp_opt) {
++              /* Options are available only in the non fallback cases
++               * avoid updating rx path fields otherwise
++               */
++              WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
++              WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
++              subflow_set_remote_key(msk, subflow, mp_opt);
++      }
++
++      if (!sock_owned_by_user(sk)) {
++              __mptcp_sync_state(sk, ssk->sk_state);
++      } else {
++              msk->pending_state = ssk->sk_state;
++              __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
++      }
++      mptcp_data_unlock(sk);
++}
++
+ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+ {
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+@@ -510,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+               if (mp_opt.deny_join_id0)
+                       WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+               subflow->mp_capable = 1;
+-              subflow_set_remote_key(msk, subflow, &mp_opt);
+               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
+               mptcp_finish_connect(sk);
+-              mptcp_propagate_state(parent, sk);
++              mptcp_propagate_state(parent, sk, subflow, &mp_opt);
+       } else if (subflow->request_join) {
+               u8 hmac[SHA256_DIGEST_SIZE];
+@@ -556,7 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
+               }
+       } else if (mptcp_check_fallback(sk)) {
+ fallback:
+-              mptcp_propagate_state(parent, sk);
++              mptcp_propagate_state(parent, sk, subflow, NULL);
+       }
+       return;
+@@ -741,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
+       kfree_rcu(ctx, rcu);
+ }
+-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+-                                   const struct mptcp_options_received *mp_opt)
++void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
++                                     struct mptcp_subflow_context *subflow,
++                                     const struct mptcp_options_received *mp_opt)
+ {
+-      struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+-
+       subflow_set_remote_key(msk, subflow, mp_opt);
+       subflow->fully_established = 1;
+       WRITE_ONCE(msk->fully_established, true);
+       if (subflow->is_mptfo)
+-              mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
++              __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ }
+ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+@@ -844,7 +853,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+                        * mpc option
+                        */
+                       if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
+-                              mptcp_subflow_fully_established(ctx, &mp_opt);
+                               mptcp_pm_fully_established(owner, child);
+                               ctx->pm_notified = 1;
+                       }
+@@ -1748,7 +1756,7 @@ static void subflow_state_change(struct sock *sk)
+               mptcp_do_fallback(sk);
+               pr_fallback(msk);
+               subflow->conn_finished = 1;
+-              mptcp_propagate_state(parent, sk);
++              mptcp_propagate_state(parent, sk, subflow, NULL);
+       }
+       /* as recvmsg() does not acquire the subflow socket for ssk selection
+-- 
+2.43.0
+
diff --git a/queue-6.6/mptcp-fix-more-tx-path-fields-initialization.patch b/queue-6.6/mptcp-fix-more-tx-path-fields-initialization.patch
new file mode 100644 (file)
index 0000000..d204651
--- /dev/null
@@ -0,0 +1,79 @@
+From c4e9f078d87de7990431384ed15d7a148fd2cd85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 19:03:51 +0100
+Subject: mptcp: fix more tx path fields initialization
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 3f83d8a77eeeb47011b990fd766a421ee64f1d73 ]
+
+The 'msk->write_seq' and 'msk->snd_nxt' are always updated under
+the msk socket lock, except at MPC handshake completiont time.
+
+Builds-up on the previous commit to move such init under the relevant
+lock.
+
+There are no known problems caused by the potential race, the
+primary goal is consistency.
+
+Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c |  6 ++----
+ net/mptcp/subflow.c  | 13 +++++++++++--
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 2f794924ae5d4..55a90a7b7b517 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -3525,10 +3525,8 @@ void mptcp_finish_connect(struct sock *ssk)
+        * accessing the field below
+        */
+       WRITE_ONCE(msk->local_key, subflow->local_key);
+-      WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+-      WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+-      WRITE_ONCE(msk->snd_una, msk->write_seq);
+-      WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
++      WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
++      WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+       mptcp_pm_new_connection(msk, ssk, 0);
+ }
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 15f456fb28977..ba739e7009221 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -421,12 +421,21 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
+ void __mptcp_sync_state(struct sock *sk, int state)
+ {
++      struct mptcp_subflow_context *subflow;
+       struct mptcp_sock *msk = mptcp_sk(sk);
++      struct sock *ssk = msk->first;
+-      __mptcp_propagate_sndbuf(sk, msk->first);
++      subflow = mptcp_subflow_ctx(ssk);
++      __mptcp_propagate_sndbuf(sk, ssk);
+       if (!msk->rcvspace_init)
+-              mptcp_rcv_space_init(msk, msk->first);
++              mptcp_rcv_space_init(msk, ssk);
++
+       if (sk->sk_state == TCP_SYN_SENT) {
++              /* subflow->idsn is always available is TCP_SYN_SENT state,
++               * even for the FASTOPEN scenarios
++               */
++              WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
++              WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+               mptcp_set_state(sk, state);
+               sk->sk_state_change(sk);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/mptcp-use-mptcp_set_state.patch b/queue-6.6/mptcp-use-mptcp_set_state.patch
new file mode 100644 (file)
index 0000000..a18234e
--- /dev/null
@@ -0,0 +1,217 @@
+From 8cf052a72d794c3afa841e0261d58273ee6a83d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Dec 2023 13:47:23 +0100
+Subject: mptcp: use mptcp_set_state
+
+From: Geliang Tang <geliang.tang@linux.dev>
+
+[ Upstream commit c693a8516429908da3ea111b0caa3c042ab1e6e9 ]
+
+This patch replaces all the 'inet_sk_state_store()' calls under net/mptcp
+with the new helper mptcp_set_state().
+
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/460
+Signed-off-by: Geliang Tang <geliang.tang@linux.dev>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c |  5 +++++
+ net/mptcp/protocol.c   | 38 +++++++++++++++++++-------------------
+ net/mptcp/subflow.c    |  2 +-
+ 3 files changed, 25 insertions(+), 20 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 3011bc378462b..44c0e96210a46 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
+       if (err)
+               return err;
++      /* We don't use mptcp_set_state() here because it needs to be called
++       * under the msk socket lock. For the moment, that will not bring
++       * anything more than only calling inet_sk_state_store(), because the
++       * old status is known (TCP_CLOSE).
++       */
+       inet_sk_state_store(newsk, TCP_LISTEN);
+       lock_sock(ssk);
+       err = __inet_listen_sk(ssk, backlog);
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 7765514451ded..2f794924ae5d4 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -445,11 +445,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
+               switch (sk->sk_state) {
+               case TCP_FIN_WAIT1:
+-                      inet_sk_state_store(sk, TCP_FIN_WAIT2);
++                      mptcp_set_state(sk, TCP_FIN_WAIT2);
+                       break;
+               case TCP_CLOSING:
+               case TCP_LAST_ACK:
+-                      inet_sk_state_store(sk, TCP_CLOSE);
++                      mptcp_set_state(sk, TCP_CLOSE);
+                       break;
+               }
+@@ -610,13 +610,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
+               switch (sk->sk_state) {
+               case TCP_ESTABLISHED:
+-                      inet_sk_state_store(sk, TCP_CLOSE_WAIT);
++                      mptcp_set_state(sk, TCP_CLOSE_WAIT);
+                       break;
+               case TCP_FIN_WAIT1:
+-                      inet_sk_state_store(sk, TCP_CLOSING);
++                      mptcp_set_state(sk, TCP_CLOSING);
+                       break;
+               case TCP_FIN_WAIT2:
+-                      inet_sk_state_store(sk, TCP_CLOSE);
++                      mptcp_set_state(sk, TCP_CLOSE);
+                       break;
+               default:
+                       /* Other states not expected */
+@@ -791,7 +791,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+        */
+       ssk_state = inet_sk_state_load(ssk);
+       if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+-              inet_sk_state_store(sk, ssk_state);
++              mptcp_set_state(sk, ssk_state);
+       WRITE_ONCE(sk->sk_err, -err);
+       /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+@@ -2470,7 +2470,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+           inet_sk_state_load(msk->first) == TCP_CLOSE) {
+               if (sk->sk_state != TCP_ESTABLISHED ||
+                   msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+-                      inet_sk_state_store(sk, TCP_CLOSE);
++                      mptcp_set_state(sk, TCP_CLOSE);
+                       mptcp_close_wake_up(sk);
+               } else {
+                       mptcp_start_tout_timer(sk);
+@@ -2565,7 +2565,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
+               WRITE_ONCE(sk->sk_err, ECONNRESET);
+       }
+-      inet_sk_state_store(sk, TCP_CLOSE);
++      mptcp_set_state(sk, TCP_CLOSE);
+       WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
+       smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+       set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
+@@ -2700,7 +2700,7 @@ static void mptcp_do_fastclose(struct sock *sk)
+       struct mptcp_subflow_context *subflow, *tmp;
+       struct mptcp_sock *msk = mptcp_sk(sk);
+-      inet_sk_state_store(sk, TCP_CLOSE);
++      mptcp_set_state(sk, TCP_CLOSE);
+       mptcp_for_each_subflow_safe(msk, subflow, tmp)
+               __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
+                                 subflow, MPTCP_CF_FASTCLOSE);
+@@ -2917,7 +2917,7 @@ static int mptcp_close_state(struct sock *sk)
+       int next = (int)new_state[sk->sk_state];
+       int ns = next & TCP_STATE_MASK;
+-      inet_sk_state_store(sk, ns);
++      mptcp_set_state(sk, ns);
+       return next & TCP_ACTION_FIN;
+ }
+@@ -3035,7 +3035,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
+       if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
+               mptcp_check_listen_stop(sk);
+-              inet_sk_state_store(sk, TCP_CLOSE);
++              mptcp_set_state(sk, TCP_CLOSE);
+               goto cleanup;
+       }
+@@ -3078,7 +3078,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
+        * state, let's not keep resources busy for no reasons
+        */
+       if (subflows_alive == 0)
+-              inet_sk_state_store(sk, TCP_CLOSE);
++              mptcp_set_state(sk, TCP_CLOSE);
+       sock_hold(sk);
+       pr_debug("msk=%p state=%d", sk, sk->sk_state);
+@@ -3144,7 +3144,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
+               return -EBUSY;
+       mptcp_check_listen_stop(sk);
+-      inet_sk_state_store(sk, TCP_CLOSE);
++      mptcp_set_state(sk, TCP_CLOSE);
+       mptcp_stop_rtx_timer(sk);
+       mptcp_stop_tout_timer(sk);
+@@ -3231,7 +3231,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
+       /* this can't race with mptcp_close(), as the msk is
+        * not yet exposted to user-space
+        */
+-      inet_sk_state_store(nsk, TCP_ESTABLISHED);
++      mptcp_set_state(nsk, TCP_ESTABLISHED);
+       /* The msk maintain a ref to each subflow in the connections list */
+       WRITE_ONCE(msk->first, ssk);
+@@ -3686,7 +3686,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+       if (IS_ERR(ssk))
+               return PTR_ERR(ssk);
+-      inet_sk_state_store(sk, TCP_SYN_SENT);
++      mptcp_set_state(sk, TCP_SYN_SENT);
+       subflow = mptcp_subflow_ctx(ssk);
+ #ifdef CONFIG_TCP_MD5SIG
+       /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+@@ -3736,7 +3736,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+       if (unlikely(err)) {
+               /* avoid leaving a dangling token in an unconnected socket */
+               mptcp_token_destroy(msk);
+-              inet_sk_state_store(sk, TCP_CLOSE);
++              mptcp_set_state(sk, TCP_CLOSE);
+               return err;
+       }
+@@ -3826,13 +3826,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
+               goto unlock;
+       }
+-      inet_sk_state_store(sk, TCP_LISTEN);
++      mptcp_set_state(sk, TCP_LISTEN);
+       sock_set_flag(sk, SOCK_RCU_FREE);
+       lock_sock(ssk);
+       err = __inet_listen_sk(ssk, backlog);
+       release_sock(ssk);
+-      inet_sk_state_store(sk, inet_sk_state_load(ssk));
++      mptcp_set_state(sk, inet_sk_state_load(ssk));
+       if (!err) {
+               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+@@ -3892,7 +3892,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
+                       __mptcp_close_ssk(newsk, msk->first,
+                                         mptcp_subflow_ctx(msk->first), 0);
+                       if (unlikely(list_is_singular(&msk->conn_list)))
+-                              inet_sk_state_store(newsk, TCP_CLOSE);
++                              mptcp_set_state(newsk, TCP_CLOSE);
+               }
+       }
+       release_sock(newsk);
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 8c7e22a9a37bd..15f456fb28977 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -427,7 +427,7 @@ void __mptcp_sync_state(struct sock *sk, int state)
+       if (!msk->rcvspace_init)
+               mptcp_rcv_space_init(msk, msk->first);
+       if (sk->sk_state == TCP_SYN_SENT) {
+-              inet_sk_state_store(sk, state);
++              mptcp_set_state(sk, state);
+               sk->sk_state_change(sk);
+       }
+ }
+-- 
+2.43.0
+
index f29a628b3019fb7bd2c463f48c3b0ffc47ba9bf7..f8fc9e6e608760c8741dab77afd5df0fb73a85bd 100644 (file)
@@ -118,3 +118,16 @@ virtio-blk-ensure-no-requests-in-virtqueues-before-d.patch
 cifs-change-tcon-status-when-need_reconnect-is-set-o.patch
 cifs-handle-cases-where-multiple-sessions-share-conn.patch
 smb3-clarify-mount-warning.patch
+mptcp-add-currestab-mib-counter-support.patch
+mptcp-use-mptcp_set_state.patch
+mptcp-fix-more-tx-path-fields-initialization.patch
+mptcp-corner-case-locking-for-rx-path-fields-initial.patch
+drm-amd-display-add-dpia-display-mode-validation-log.patch
+drm-amd-display-request-usb4-bw-for-mst-streams.patch
+drm-amd-display-fixed-integer-types-and-null-check-l.patch
+xen-evtchn-allow-shared-registration-of-irq-handers.patch
+xen-events-reduce-externally-visible-helper-function.patch
+xen-events-remove-some-simple-helpers-from-events_ba.patch
+xen-events-drop-xen_allocate_irqs_dynamic.patch
+xen-events-modify-internal-un-bind-interfaces.patch
+xen-events-close-evtchn-after-mapping-cleanup.patch
diff --git a/queue-6.6/xen-events-close-evtchn-after-mapping-cleanup.patch b/queue-6.6/xen-events-close-evtchn-after-mapping-cleanup.patch
new file mode 100644 (file)
index 0000000..68a67d2
--- /dev/null
@@ -0,0 +1,171 @@
+From fe5396afd11d8a5bab88de130fd203974520e953 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jan 2024 16:31:28 +0000
+Subject: xen/events: close evtchn after mapping cleanup
+
+From: Maximilian Heyne <mheyne@amazon.de>
+
+[ Upstream commit fa765c4b4aed2d64266b694520ecb025c862c5a9 ]
+
+shutdown_pirq and startup_pirq are not taking the
+irq_mapping_update_lock because they can't due to lock inversion. Both
+are called with the irq_desc->lock being taking. The lock order,
+however, is first irq_mapping_update_lock and then irq_desc->lock.
+
+This opens multiple races:
+- shutdown_pirq can be interrupted by a function that allocates an event
+  channel:
+
+  CPU0                        CPU1
+  shutdown_pirq {
+    xen_evtchn_close(e)
+                              __startup_pirq {
+                                EVTCHNOP_bind_pirq
+                                  -> returns just freed evtchn e
+                                set_evtchn_to_irq(e, irq)
+                              }
+    xen_irq_info_cleanup() {
+      set_evtchn_to_irq(e, -1)
+    }
+  }
+
+  Assume here event channel e refers here to the same event channel
+  number.
+  After this race the evtchn_to_irq mapping for e is invalid (-1).
+
+- __startup_pirq races with __unbind_from_irq in a similar way. Because
+  __startup_pirq doesn't take irq_mapping_update_lock it can grab the
+  evtchn that __unbind_from_irq is currently freeing and cleaning up. In
+  this case even though the event channel is allocated, its mapping can
+  be unset in evtchn_to_irq.
+
+The fix is to first cleanup the mappings and then close the event
+channel. In this way, when an event channel gets allocated it's
+potential previous evtchn_to_irq mappings are guaranteed to be unset already.
+This is also the reverse order of the allocation where first the event
+channel is allocated and then the mappings are setup.
+
+On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
+[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
+devices. The issue is that during nvme_setup_io_queues, pci_free_irq
+is called for every device which results in a call to shutdown_pirq.
+With many nvme devices it's therefore likely to hit this race during
+boot because there will be multiple calls to shutdown_pirq and
+startup_pirq are running potentially in parallel.
+
+  ------------[ cut here ]------------
+  blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
+  kernel BUG at drivers/xen/events/events_base.c:499!
+  invalid opcode: 0000 [#1] SMP PTI
+  CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
+  Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
+  Workqueue: nvme-reset-wq nvme_reset_work
+  RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
+  Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
+  RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
+  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
+  RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
+  RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
+  R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
+  R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
+  FS:  0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   ? show_trace_log_lvl+0x1c1/0x2d9
+   ? show_trace_log_lvl+0x1c1/0x2d9
+   ? set_affinity_irq+0xdc/0x1c0
+   ? __die_body.cold+0x8/0xd
+   ? die+0x2b/0x50
+   ? do_trap+0x90/0x110
+   ? bind_evtchn_to_cpu+0xdf/0xf0
+   ? do_error_trap+0x65/0x80
+   ? bind_evtchn_to_cpu+0xdf/0xf0
+   ? exc_invalid_op+0x4e/0x70
+   ? bind_evtchn_to_cpu+0xdf/0xf0
+   ? asm_exc_invalid_op+0x12/0x20
+   ? bind_evtchn_to_cpu+0xdf/0xf0
+   ? bind_evtchn_to_cpu+0xc5/0xf0
+   set_affinity_irq+0xdc/0x1c0
+   irq_do_set_affinity+0x1d7/0x1f0
+   irq_setup_affinity+0xd6/0x1a0
+   irq_startup+0x8a/0xf0
+   __setup_irq+0x639/0x6d0
+   ? nvme_suspend+0x150/0x150
+   request_threaded_irq+0x10c/0x180
+   ? nvme_suspend+0x150/0x150
+   pci_request_irq+0xa8/0xf0
+   ? __blk_mq_free_request+0x74/0xa0
+   queue_request_irq+0x6f/0x80
+   nvme_create_queue+0x1af/0x200
+   nvme_create_io_queues+0xbd/0xf0
+   nvme_setup_io_queues+0x246/0x320
+   ? nvme_irq_check+0x30/0x30
+   nvme_reset_work+0x1c8/0x400
+   process_one_work+0x1b0/0x350
+   worker_thread+0x49/0x310
+   ? process_one_work+0x350/0x350
+   kthread+0x11b/0x140
+   ? __kthread_bind_mask+0x60/0x60
+   ret_from_fork+0x22/0x30
+  Modules linked in:
+  ---[ end trace a11715de1eee1873 ]---
+
+Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
+Cc: stable@vger.kernel.org
+Co-debugged-by: Andrew Panyakin <apanyaki@amazon.com>
+Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 6f57ef78f5507..36ba3ef6ef01e 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -908,8 +908,8 @@ static void shutdown_pirq(struct irq_data *data)
+               return;
+       do_mask(info, EVT_MASK_REASON_EXPLICIT);
+-      xen_evtchn_close(evtchn);
+       xen_irq_info_cleanup(info);
++      xen_evtchn_close(evtchn);
+ }
+ static void enable_pirq(struct irq_data *data)
+@@ -941,6 +941,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+ {
+       evtchn_port_t evtchn;
++      bool close_evtchn = false;
+       if (!info) {
+               xen_irq_free_desc(irq);
+@@ -960,7 +961,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+               struct xenbus_device *dev;
+               if (!info->is_static)
+-                      xen_evtchn_close(evtchn);
++                      close_evtchn = true;
+               switch (info->type) {
+               case IRQT_VIRQ:
+@@ -980,6 +981,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+               }
+               xen_irq_info_cleanup(info);
++
++              if (close_evtchn)
++                      xen_evtchn_close(evtchn);
+       }
+       xen_free_irq(info);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xen-events-drop-xen_allocate_irqs_dynamic.patch b/queue-6.6/xen-events-drop-xen_allocate_irqs_dynamic.patch
new file mode 100644 (file)
index 0000000..5fdcf7a
--- /dev/null
@@ -0,0 +1,188 @@
+From 1c37d9d8b5ba3045652ec98bccca52987c81bf5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 10:29:02 +0200
+Subject: xen/events: drop xen_allocate_irqs_dynamic()
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 5dd9ad32d7758b1a76742f394acf0eb3ac8a636a ]
+
+Instead of having a common function for allocating a single IRQ or a
+consecutive number of IRQs, split up the functionality into the callers
+of xen_allocate_irqs_dynamic().
+
+This allows to handle any allocation error in xen_irq_init() gracefully
+instead of panicing the system. Let xen_irq_init() return the irq_info
+pointer or NULL in case of an allocation error.
+
+Additionally set the IRQ into irq_info already at allocation time, as
+otherwise the IRQ would be '0' (which is a valid IRQ number) until
+being set.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 74 +++++++++++++++++++-------------
+ 1 file changed, 44 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index d3d7501628381..4dfd68382465b 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -304,6 +304,13 @@ static void channels_on_cpu_inc(struct irq_info *info)
+       info->is_accounted = 1;
+ }
++static void xen_irq_free_desc(unsigned int irq)
++{
++      /* Legacy IRQ descriptors are managed by the arch. */
++      if (irq >= nr_legacy_irqs())
++              irq_free_desc(irq);
++}
++
+ static void delayed_free_irq(struct work_struct *work)
+ {
+       struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
+@@ -315,9 +322,7 @@ static void delayed_free_irq(struct work_struct *work)
+       kfree(info);
+-      /* Legacy IRQ descriptors are managed by the arch. */
+-      if (irq >= nr_legacy_irqs())
+-              irq_free_desc(irq);
++      xen_irq_free_desc(irq);
+ }
+ /* Constructors for packed IRQ information. */
+@@ -332,7 +337,6 @@ static int xen_irq_info_common_setup(struct irq_info *info,
+       BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
+       info->type = type;
+-      info->irq = irq;
+       info->evtchn = evtchn;
+       info->cpu = cpu;
+       info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+@@ -733,47 +737,45 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+ }
+ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+-static void xen_irq_init(unsigned irq)
++static struct irq_info *xen_irq_init(unsigned int irq)
+ {
+       struct irq_info *info;
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+-      if (info == NULL)
+-              panic("Unable to allocate metadata for IRQ%d\n", irq);
++      if (info) {
++              info->irq = irq;
++              info->type = IRQT_UNBOUND;
++              info->refcnt = -1;
++              INIT_RCU_WORK(&info->rwork, delayed_free_irq);
+-      info->type = IRQT_UNBOUND;
+-      info->refcnt = -1;
+-      INIT_RCU_WORK(&info->rwork, delayed_free_irq);
++              set_info_for_irq(irq, info);
++              /*
++               * Interrupt affinity setting can be immediate. No point
++               * in delaying it until an interrupt is handled.
++               */
++              irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+-      set_info_for_irq(irq, info);
+-      /*
+-       * Interrupt affinity setting can be immediate. No point
+-       * in delaying it until an interrupt is handled.
+-       */
+-      irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
++              INIT_LIST_HEAD(&info->eoi_list);
++              list_add_tail(&info->list, &xen_irq_list_head);
++      }
+-      INIT_LIST_HEAD(&info->eoi_list);
+-      list_add_tail(&info->list, &xen_irq_list_head);
++      return info;
+ }
+-static int __must_check xen_allocate_irqs_dynamic(int nvec)
++static inline int __must_check xen_allocate_irq_dynamic(void)
+ {
+-      int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
++      int irq = irq_alloc_desc_from(0, -1);
+       if (irq >= 0) {
+-              for (i = 0; i < nvec; i++)
+-                      xen_irq_init(irq + i);
++              if (!xen_irq_init(irq)) {
++                      xen_irq_free_desc(irq);
++                      irq = -1;
++              }
+       }
+       return irq;
+ }
+-static inline int __must_check xen_allocate_irq_dynamic(void)
+-{
+-
+-      return xen_allocate_irqs_dynamic(1);
+-}
+-
+ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+ {
+       int irq;
+@@ -793,7 +795,10 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+       else
+               irq = irq_alloc_desc_at(gsi, -1);
+-      xen_irq_init(irq);
++      if (!xen_irq_init(irq)) {
++              xen_irq_free_desc(irq);
++              irq = -1;
++      }
+       return irq;
+ }
+@@ -963,6 +968,11 @@ static void __unbind_from_irq(unsigned int irq)
+       evtchn_port_t evtchn = evtchn_from_irq(irq);
+       struct irq_info *info = info_for_irq(irq);
++      if (!info) {
++              xen_irq_free_desc(irq);
++              return;
++      }
++
+       if (info->refcnt > 0) {
+               info->refcnt--;
+               if (info->refcnt != 0)
+@@ -1101,11 +1111,14 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+       mutex_lock(&irq_mapping_update_lock);
+-      irq = xen_allocate_irqs_dynamic(nvec);
++      irq = irq_alloc_descs(-1, 0, nvec, -1);
+       if (irq < 0)
+               goto out;
+       for (i = 0; i < nvec; i++) {
++              if (!xen_irq_init(irq + i))
++                      goto error_irq;
++
+               irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
+               ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+@@ -1753,6 +1766,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
+          so there should be a proper type */
+       BUG_ON(info->type == IRQT_UNBOUND);
++      info->irq = irq;
+       (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
+       mutex_unlock(&irq_mapping_update_lock);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xen-events-modify-internal-un-bind-interfaces.patch b/queue-6.6/xen-events-modify-internal-un-bind-interfaces.patch
new file mode 100644 (file)
index 0000000..b566a4d
--- /dev/null
@@ -0,0 +1,695 @@
+From c14a0c2a671788360660b60d5becf2ca1ac54c9a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Sep 2023 09:09:52 +0200
+Subject: xen/events: modify internal [un]bind interfaces
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 3fcdaf3d7634338c3f5cbfa7451eb0b6b0024844 ]
+
+Modify the internal bind- and unbind-interfaces to take a struct
+irq_info parameter. When allocating a new IRQ pass the pointer from
+the allocating function further up.
+
+This will reduce the number of info_for_irq() calls and make the code
+more efficient.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 259 +++++++++++++++----------------
+ 1 file changed, 124 insertions(+), 135 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 4dfd68382465b..6f57ef78f5507 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -327,7 +327,6 @@ static void delayed_free_irq(struct work_struct *work)
+ /* Constructors for packed IRQ information. */
+ static int xen_irq_info_common_setup(struct irq_info *info,
+-                                   unsigned irq,
+                                    enum xen_irq_type type,
+                                    evtchn_port_t evtchn,
+                                    unsigned short cpu)
+@@ -342,23 +341,22 @@ static int xen_irq_info_common_setup(struct irq_info *info,
+       info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+       raw_spin_lock_init(&info->lock);
+-      ret = set_evtchn_to_irq(evtchn, irq);
++      ret = set_evtchn_to_irq(evtchn, info->irq);
+       if (ret < 0)
+               return ret;
+-      irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
++      irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN);
+       return xen_evtchn_port_setup(evtchn);
+ }
+-static int xen_irq_info_evtchn_setup(unsigned irq,
++static int xen_irq_info_evtchn_setup(struct irq_info *info,
+                                    evtchn_port_t evtchn,
+                                    struct xenbus_device *dev)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+       int ret;
+-      ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
++      ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0);
+       info->u.interdomain = dev;
+       if (dev)
+               atomic_inc(&dev->event_channels);
+@@ -366,50 +364,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
+       return ret;
+ }
+-static int xen_irq_info_ipi_setup(unsigned cpu,
+-                                unsigned irq,
+-                                evtchn_port_t evtchn,
+-                                enum ipi_vector ipi)
++static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu,
++                                evtchn_port_t evtchn, enum ipi_vector ipi)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+-
+       info->u.ipi = ipi;
+-      per_cpu(ipi_to_irq, cpu)[ipi] = irq;
++      per_cpu(ipi_to_irq, cpu)[ipi] = info->irq;
+       per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
+-      return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
++      return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0);
+ }
+-static int xen_irq_info_virq_setup(unsigned cpu,
+-                                 unsigned irq,
+-                                 evtchn_port_t evtchn,
+-                                 unsigned virq)
++static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu,
++                                 evtchn_port_t evtchn, unsigned int virq)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+-
+       info->u.virq = virq;
+-      per_cpu(virq_to_irq, cpu)[virq] = irq;
++      per_cpu(virq_to_irq, cpu)[virq] = info->irq;
+-      return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
++      return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0);
+ }
+-static int xen_irq_info_pirq_setup(unsigned irq,
+-                                 evtchn_port_t evtchn,
+-                                 unsigned pirq,
+-                                 unsigned gsi,
+-                                 uint16_t domid,
+-                                 unsigned char flags)
++static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn,
++                                 unsigned int pirq, unsigned int gsi,
++                                 uint16_t domid, unsigned char flags)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+-
+       info->u.pirq.pirq = pirq;
+       info->u.pirq.gsi = gsi;
+       info->u.pirq.domid = domid;
+       info->u.pirq.flags = flags;
+-      return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
++      return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0);
+ }
+ static void xen_irq_info_cleanup(struct irq_info *info)
+@@ -453,20 +438,16 @@ int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+       return irq;
+ }
+-static enum ipi_vector ipi_from_irq(unsigned irq)
++static enum ipi_vector ipi_from_irq(struct irq_info *info)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+-
+       BUG_ON(info == NULL);
+       BUG_ON(info->type != IRQT_IPI);
+       return info->u.ipi;
+ }
+-static unsigned virq_from_irq(unsigned irq)
++static unsigned int virq_from_irq(struct irq_info *info)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+-
+       BUG_ON(info == NULL);
+       BUG_ON(info->type != IRQT_VIRQ);
+@@ -533,13 +514,9 @@ static bool pirq_needs_eoi_flag(unsigned irq)
+       return info->u.pirq.flags & PIRQ_NEEDS_EOI;
+ }
+-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
++static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu,
+                              bool force_affinity)
+ {
+-      struct irq_info *info = evtchn_to_info(evtchn);
+-
+-      BUG_ON(info == NULL);
+-
+       if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+               struct irq_data *data = irq_get_irq_data(info->irq);
+@@ -547,7 +524,7 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+               irq_data_update_effective_affinity(data, cpumask_of(cpu));
+       }
+-      xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
++      xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu);
+       channels_on_cpu_dec(info);
+       info->cpu = cpu;
+@@ -762,23 +739,24 @@ static struct irq_info *xen_irq_init(unsigned int irq)
+       return info;
+ }
+-static inline int __must_check xen_allocate_irq_dynamic(void)
++static struct irq_info *xen_allocate_irq_dynamic(void)
+ {
+       int irq = irq_alloc_desc_from(0, -1);
++      struct irq_info *info = NULL;
+       if (irq >= 0) {
+-              if (!xen_irq_init(irq)) {
++              info = xen_irq_init(irq);
++              if (!info)
+                       xen_irq_free_desc(irq);
+-                      irq = -1;
+-              }
+       }
+-      return irq;
++      return info;
+ }
+-static int __must_check xen_allocate_irq_gsi(unsigned gsi)
++static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi)
+ {
+       int irq;
++      struct irq_info *info;
+       /*
+        * A PV guest has no concept of a GSI (since it has no ACPI
+@@ -795,18 +773,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+       else
+               irq = irq_alloc_desc_at(gsi, -1);
+-      if (!xen_irq_init(irq)) {
++      info = xen_irq_init(irq);
++      if (!info)
+               xen_irq_free_desc(irq);
+-              irq = -1;
+-      }
+-      return irq;
++      return info;
+ }
+-static void xen_free_irq(unsigned irq)
++static void xen_free_irq(struct irq_info *info)
+ {
+-      struct irq_info *info = info_for_irq(irq);
+-
+       if (WARN_ON(!info))
+               return;
+@@ -897,7 +872,7 @@ static unsigned int __startup_pirq(unsigned int irq)
+               goto err;
+       info->evtchn = evtchn;
+-      bind_evtchn_to_cpu(evtchn, 0, false);
++      bind_evtchn_to_cpu(info, 0, false);
+       rc = xen_evtchn_port_setup(evtchn);
+       if (rc)
+@@ -963,10 +938,9 @@ int xen_irq_from_gsi(unsigned gsi)
+ }
+ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+-static void __unbind_from_irq(unsigned int irq)
++static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
+ {
+-      evtchn_port_t evtchn = evtchn_from_irq(irq);
+-      struct irq_info *info = info_for_irq(irq);
++      evtchn_port_t evtchn;
+       if (!info) {
+               xen_irq_free_desc(irq);
+@@ -979,6 +953,8 @@ static void __unbind_from_irq(unsigned int irq)
+                       return;
+       }
++      evtchn = info->evtchn;
++
+       if (VALID_EVTCHN(evtchn)) {
+               unsigned int cpu = info->cpu;
+               struct xenbus_device *dev;
+@@ -988,11 +964,11 @@ static void __unbind_from_irq(unsigned int irq)
+               switch (info->type) {
+               case IRQT_VIRQ:
+-                      per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
++                      per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1;
+                       break;
+               case IRQT_IPI:
+-                      per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+-                      per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(irq)] = 0;
++                      per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
++                      per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0;
+                       break;
+               case IRQT_EVTCHN:
+                       dev = info->u.interdomain;
+@@ -1006,7 +982,7 @@ static void __unbind_from_irq(unsigned int irq)
+               xen_irq_info_cleanup(info);
+       }
+-      xen_free_irq(irq);
++      xen_free_irq(info);
+ }
+ /*
+@@ -1022,24 +998,24 @@ static void __unbind_from_irq(unsigned int irq)
+ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
+                            unsigned pirq, int shareable, char *name)
+ {
+-      int irq;
++      struct irq_info *info;
+       struct physdev_irq irq_op;
+       int ret;
+       mutex_lock(&irq_mapping_update_lock);
+-      irq = xen_irq_from_gsi(gsi);
+-      if (irq != -1) {
++      ret = xen_irq_from_gsi(gsi);
++      if (ret != -1) {
+               pr_info("%s: returning irq %d for gsi %u\n",
+-                      __func__, irq, gsi);
++                      __func__, ret, gsi);
+               goto out;
+       }
+-      irq = xen_allocate_irq_gsi(gsi);
+-      if (irq < 0)
++      info = xen_allocate_irq_gsi(gsi);
++      if (!info)
+               goto out;
+-      irq_op.irq = irq;
++      irq_op.irq = info->irq;
+       irq_op.vector = 0;
+       /* Only the privileged domain can do this. For non-priv, the pcifront
+@@ -1047,20 +1023,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
+        * this in the priv domain. */
+       if (xen_initial_domain() &&
+           HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+-              xen_free_irq(irq);
+-              irq = -ENOSPC;
++              xen_free_irq(info);
++              ret = -ENOSPC;
+               goto out;
+       }
+-      ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
++      ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF,
+                              shareable ? PIRQ_SHAREABLE : 0);
+       if (ret < 0) {
+-              __unbind_from_irq(irq);
+-              irq = ret;
++              __unbind_from_irq(info, info->irq);
+               goto out;
+       }
+-      pirq_query_unmask(irq);
++      pirq_query_unmask(info->irq);
+       /* We try to use the handler with the appropriate semantic for the
+        * type of interrupt: if the interrupt is an edge triggered
+        * interrupt we use handle_edge_irq.
+@@ -1077,16 +1052,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
+        * is the right choice either way.
+        */
+       if (shareable)
+-              irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
++              irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
+                               handle_fasteoi_irq, name);
+       else
+-              irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
++              irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
+                               handle_edge_irq, name);
++      ret = info->irq;
++
+ out:
+       mutex_unlock(&irq_mapping_update_lock);
+-      return irq;
++      return ret;
+ }
+ #ifdef CONFIG_PCI_MSI
+@@ -1108,6 +1085,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+                            int pirq, int nvec, const char *name, domid_t domid)
+ {
+       int i, irq, ret;
++      struct irq_info *info;
+       mutex_lock(&irq_mapping_update_lock);
+@@ -1116,12 +1094,13 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+               goto out;
+       for (i = 0; i < nvec; i++) {
+-              if (!xen_irq_init(irq + i))
++              info = xen_irq_init(irq + i);
++              if (!info)
+                       goto error_irq;
+               irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
+-              ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
++              ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
+                                             i == 0 ? 0 : PIRQ_MSI_GROUP);
+               if (ret < 0)
+                       goto error_irq;
+@@ -1133,9 +1112,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ out:
+       mutex_unlock(&irq_mapping_update_lock);
+       return irq;
++
+ error_irq:
+-      while (nvec--)
+-              __unbind_from_irq(irq + nvec);
++      while (nvec--) {
++              info = info_for_irq(irq + nvec);
++              __unbind_from_irq(info, irq + nvec);
++      }
+       mutex_unlock(&irq_mapping_update_lock);
+       return ret;
+ }
+@@ -1171,7 +1153,7 @@ int xen_destroy_irq(int irq)
+               }
+       }
+-      xen_free_irq(irq);
++      xen_free_irq(info);
+ out:
+       mutex_unlock(&irq_mapping_update_lock);
+@@ -1210,8 +1192,7 @@ EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+                                  struct xenbus_device *dev)
+ {
+-      int irq;
+-      int ret;
++      int ret = -ENOMEM;
+       struct irq_info *info;
+       if (evtchn >= xen_evtchn_max_channels())
+@@ -1222,17 +1203,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+       info = evtchn_to_info(evtchn);
+       if (!info) {
+-              irq = xen_allocate_irq_dynamic();
+-              if (irq < 0)
++              info = xen_allocate_irq_dynamic();
++              if (!info)
+                       goto out;
+-              irq_set_chip_and_handler_name(irq, chip,
++              irq_set_chip_and_handler_name(info->irq, chip,
+                                             handle_edge_irq, "event");
+-              ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
++              ret = xen_irq_info_evtchn_setup(info, evtchn, dev);
+               if (ret < 0) {
+-                      __unbind_from_irq(irq);
+-                      irq = ret;
++                      __unbind_from_irq(info, info->irq);
+                       goto out;
+               }
+               /*
+@@ -1242,17 +1222,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+                * affinity setting is not invoked on them so nothing would
+                * bind the channel.
+                */
+-              bind_evtchn_to_cpu(evtchn, 0, false);
+-      } else {
+-              if (!WARN_ON(info->type != IRQT_EVTCHN))
+-                      info->refcnt++;
+-              irq = info->irq;
++              bind_evtchn_to_cpu(info, 0, false);
++      } else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
++              info->refcnt++;
+       }
++      ret = info->irq;
++
+ out:
+       mutex_unlock(&irq_mapping_update_lock);
+-      return irq;
++      return ret;
+ }
+ int bind_evtchn_to_irq(evtchn_port_t evtchn)
+@@ -1271,18 +1251,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ {
+       struct evtchn_bind_ipi bind_ipi;
+       evtchn_port_t evtchn;
+-      int ret, irq;
++      struct irq_info *info;
++      int ret;
+       mutex_lock(&irq_mapping_update_lock);
+-      irq = per_cpu(ipi_to_irq, cpu)[ipi];
++      ret = per_cpu(ipi_to_irq, cpu)[ipi];
+-      if (irq == -1) {
+-              irq = xen_allocate_irq_dynamic();
+-              if (irq < 0)
++      if (ret == -1) {
++              info = xen_allocate_irq_dynamic();
++              if (!info)
+                       goto out;
+-              irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
++              irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
+                                             handle_percpu_irq, "ipi");
+               bind_ipi.vcpu = xen_vcpu_nr(cpu);
+@@ -1291,25 +1272,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+                       BUG();
+               evtchn = bind_ipi.port;
+-              ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
++              ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
+               if (ret < 0) {
+-                      __unbind_from_irq(irq);
+-                      irq = ret;
++                      __unbind_from_irq(info, info->irq);
+                       goto out;
+               }
+               /*
+                * Force the affinity mask to the target CPU so proc shows
+                * the correct target.
+                */
+-              bind_evtchn_to_cpu(evtchn, cpu, true);
++              bind_evtchn_to_cpu(info, cpu, true);
++              ret = info->irq;
+       } else {
+-              struct irq_info *info = info_for_irq(irq);
++              info = info_for_irq(ret);
+               WARN_ON(info == NULL || info->type != IRQT_IPI);
+       }
+  out:
+       mutex_unlock(&irq_mapping_update_lock);
+-      return irq;
++      return ret;
+ }
+ static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
+@@ -1377,22 +1358,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
+ {
+       struct evtchn_bind_virq bind_virq;
+       evtchn_port_t evtchn = 0;
+-      int irq, ret;
++      struct irq_info *info;
++      int ret;
+       mutex_lock(&irq_mapping_update_lock);
+-      irq = per_cpu(virq_to_irq, cpu)[virq];
++      ret = per_cpu(virq_to_irq, cpu)[virq];
+-      if (irq == -1) {
+-              irq = xen_allocate_irq_dynamic();
+-              if (irq < 0)
++      if (ret == -1) {
++              info = xen_allocate_irq_dynamic();
++              if (!info)
+                       goto out;
+               if (percpu)
+-                      irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
++                      irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
+                                                     handle_percpu_irq, "virq");
+               else
+-                      irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
++                      irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip,
+                                                     handle_edge_irq, "virq");
+               bind_virq.virq = virq;
+@@ -1407,10 +1389,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
+                       BUG_ON(ret < 0);
+               }
+-              ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
++              ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
+               if (ret < 0) {
+-                      __unbind_from_irq(irq);
+-                      irq = ret;
++                      __unbind_from_irq(info, info->irq);
+                       goto out;
+               }
+@@ -1418,22 +1399,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
+                * Force the affinity mask for percpu interrupts so proc
+                * shows the correct target.
+                */
+-              bind_evtchn_to_cpu(evtchn, cpu, percpu);
++              bind_evtchn_to_cpu(info, cpu, percpu);
++              ret = info->irq;
+       } else {
+-              struct irq_info *info = info_for_irq(irq);
++              info = info_for_irq(ret);
+               WARN_ON(info == NULL || info->type != IRQT_VIRQ);
+       }
+ out:
+       mutex_unlock(&irq_mapping_update_lock);
+-      return irq;
++      return ret;
+ }
+ static void unbind_from_irq(unsigned int irq)
+ {
++      struct irq_info *info;
++
+       mutex_lock(&irq_mapping_update_lock);
+-      __unbind_from_irq(irq);
++      info = info_for_irq(irq);
++      __unbind_from_irq(info, irq);
+       mutex_unlock(&irq_mapping_update_lock);
+ }
+@@ -1767,11 +1752,11 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
+       BUG_ON(info->type == IRQT_UNBOUND);
+       info->irq = irq;
+-      (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
++      (void)xen_irq_info_evtchn_setup(info, evtchn, NULL);
+       mutex_unlock(&irq_mapping_update_lock);
+-      bind_evtchn_to_cpu(evtchn, info->cpu, false);
++      bind_evtchn_to_cpu(info, info->cpu, false);
+       /* Unmask the event channel. */
+       enable_irq(irq);
+@@ -1805,7 +1790,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
+        * it, but don't do the xenlinux-level rebind in that case.
+        */
+       if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
+-              bind_evtchn_to_cpu(evtchn, tcpu, false);
++              bind_evtchn_to_cpu(info, tcpu, false);
+       do_unmask(info, EVT_MASK_REASON_TEMPORARY);
+@@ -1956,7 +1941,7 @@ static void restore_pirqs(void)
+               if (rc) {
+                       pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
+                               gsi, irq, pirq, rc);
+-                      xen_free_irq(irq);
++                      xen_free_irq(info);
+                       continue;
+               }
+@@ -1970,13 +1955,15 @@ static void restore_cpu_virqs(unsigned int cpu)
+ {
+       struct evtchn_bind_virq bind_virq;
+       evtchn_port_t evtchn;
++      struct irq_info *info;
+       int virq, irq;
+       for (virq = 0; virq < NR_VIRQS; virq++) {
+               if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+                       continue;
++              info = info_for_irq(irq);
+-              BUG_ON(virq_from_irq(irq) != virq);
++              BUG_ON(virq_from_irq(info) != virq);
+               /* Get a new binding from Xen. */
+               bind_virq.virq = virq;
+@@ -1987,9 +1974,9 @@ static void restore_cpu_virqs(unsigned int cpu)
+               evtchn = bind_virq.port;
+               /* Record the new mapping. */
+-              (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
++              xen_irq_info_virq_setup(info, cpu, evtchn, virq);
+               /* The affinity mask is still valid */
+-              bind_evtchn_to_cpu(evtchn, cpu, false);
++              bind_evtchn_to_cpu(info, cpu, false);
+       }
+ }
+@@ -1997,13 +1984,15 @@ static void restore_cpu_ipis(unsigned int cpu)
+ {
+       struct evtchn_bind_ipi bind_ipi;
+       evtchn_port_t evtchn;
++      struct irq_info *info;
+       int ipi, irq;
+       for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+               if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+                       continue;
++              info = info_for_irq(irq);
+-              BUG_ON(ipi_from_irq(irq) != ipi);
++              BUG_ON(ipi_from_irq(info) != ipi);
+               /* Get a new binding from Xen. */
+               bind_ipi.vcpu = xen_vcpu_nr(cpu);
+@@ -2013,9 +2002,9 @@ static void restore_cpu_ipis(unsigned int cpu)
+               evtchn = bind_ipi.port;
+               /* Record the new mapping. */
+-              (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
++              xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
+               /* The affinity mask is still valid */
+-              bind_evtchn_to_cpu(evtchn, cpu, false);
++              bind_evtchn_to_cpu(info, cpu, false);
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/xen-events-reduce-externally-visible-helper-function.patch b/queue-6.6/xen-events-reduce-externally-visible-helper-function.patch
new file mode 100644 (file)
index 0000000..a8f650f
--- /dev/null
@@ -0,0 +1,129 @@
+From 66c0b706368143fac2552a9d254d6ab6073a7243 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 08:58:05 +0200
+Subject: xen/events: reduce externally visible helper functions
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 686464514fbebb6c8de4415238319e414c3500a4 ]
+
+get_evtchn_to_irq() has only one external user while irq_from_evtchn()
+provides the same functionality and is exported for a wider user base.
+Modify the only external user of get_evtchn_to_irq() to use
+irq_from_evtchn() instead and make get_evtchn_to_irq() static.
+
+evtchn_from_irq() and irq_from_virq() have a single external user and
+can easily be combined to a new helper irq_evtchn_from_virq() allowing
+to drop irq_from_virq() and to make evtchn_from_irq() static.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_2l.c       |  8 ++++----
+ drivers/xen/events/events_base.c     | 13 +++++++++----
+ drivers/xen/events/events_internal.h |  1 -
+ include/xen/events.h                 |  4 ++--
+ 4 files changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
+index b8f2f971c2f0f..e3585330cf98b 100644
+--- a/drivers/xen/events/events_2l.c
++++ b/drivers/xen/events/events_2l.c
+@@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
+       int i;
+       struct shared_info *s = HYPERVISOR_shared_info;
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
++      evtchn_port_t evtchn;
+       /* Timer interrupt has highest priority. */
+-      irq = irq_from_virq(cpu, VIRQ_TIMER);
++      irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn);
+       if (irq != -1) {
+-              evtchn_port_t evtchn = evtchn_from_irq(irq);
+               word_idx = evtchn / BITS_PER_LONG;
+               bit_idx = evtchn % BITS_PER_LONG;
+               if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
+@@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+       for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+               if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+                       int word_idx = i / BITS_PER_EVTCHN_WORD;
+-                      printk("  %d: event %d -> irq %d%s%s%s\n",
++                      printk("  %d: event %d -> irq %u%s%s%s\n",
+                              cpu_from_evtchn(i), i,
+-                             get_evtchn_to_irq(i),
++                             irq_from_evtchn(i),
+                              sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+                              ? "" : " l2-clear",
+                              !sync_test_bit(i, BM(sh->evtchn_mask))
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index cd33a418344a8..57dfb512cdc5d 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -248,7 +248,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
+       return 0;
+ }
+-int get_evtchn_to_irq(evtchn_port_t evtchn)
++static int get_evtchn_to_irq(evtchn_port_t evtchn)
+ {
+       if (evtchn >= xen_evtchn_max_channels())
+               return -1;
+@@ -415,7 +415,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
+ /*
+  * Accessors for packed IRQ information.
+  */
+-evtchn_port_t evtchn_from_irq(unsigned irq)
++static evtchn_port_t evtchn_from_irq(unsigned int irq)
+ {
+       const struct irq_info *info = NULL;
+@@ -433,9 +433,14 @@ unsigned int irq_from_evtchn(evtchn_port_t evtchn)
+ }
+ EXPORT_SYMBOL_GPL(irq_from_evtchn);
+-int irq_from_virq(unsigned int cpu, unsigned int virq)
++int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
++                       evtchn_port_t *evtchn)
+ {
+-      return per_cpu(virq_to_irq, cpu)[virq];
++      int irq = per_cpu(virq_to_irq, cpu)[virq];
++
++      *evtchn = evtchn_from_irq(irq);
++
++      return irq;
+ }
+ static enum ipi_vector ipi_from_irq(unsigned irq)
+diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
+index 4d3398eff9cdf..19ae31695edcf 100644
+--- a/drivers/xen/events/events_internal.h
++++ b/drivers/xen/events/events_internal.h
+@@ -33,7 +33,6 @@ struct evtchn_ops {
+ extern const struct evtchn_ops *evtchn_ops;
+-int get_evtchn_to_irq(evtchn_port_t evtchn);
+ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
+ unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 23932b0673dc7..7488cd51fbf4f 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -101,8 +101,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
+ /* Determine the IRQ which is bound to an event channel */
+ unsigned int irq_from_evtchn(evtchn_port_t evtchn);
+-int irq_from_virq(unsigned int cpu, unsigned int virq);
+-evtchn_port_t evtchn_from_irq(unsigned irq);
++int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
++                       evtchn_port_t *evtchn);
+ int xen_set_callback_via(uint64_t via);
+ int xen_evtchn_do_upcall(void);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xen-events-remove-some-simple-helpers-from-events_ba.patch b/queue-6.6/xen-events-remove-some-simple-helpers-from-events_ba.patch
new file mode 100644 (file)
index 0000000..eca6152
--- /dev/null
@@ -0,0 +1,263 @@
+From 2cd0afe1dddd41c5b851eb85ab30a6d800c8fe04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Sep 2023 08:24:46 +0200
+Subject: xen/events: remove some simple helpers from events_base.c
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 3bdb0ac350fe5e6301562143e4573971dd01ae0b ]
+
+The helper functions type_from_irq() and cpu_from_irq() are just one
+line functions used only internally.
+
+Open code them where needed. At the same time modify and rename
+get_evtchn_to_irq() to return a struct irq_info instead of the IRQ
+number.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 97 +++++++++++++-------------------
+ 1 file changed, 38 insertions(+), 59 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index 57dfb512cdc5d..d3d7501628381 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -248,15 +248,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
+       return 0;
+ }
+-static int get_evtchn_to_irq(evtchn_port_t evtchn)
+-{
+-      if (evtchn >= xen_evtchn_max_channels())
+-              return -1;
+-      if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+-              return -1;
+-      return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
+-}
+-
+ /* Get info for IRQ */
+ static struct irq_info *info_for_irq(unsigned irq)
+ {
+@@ -274,6 +265,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info)
+               irq_set_chip_data(irq, info);
+ }
++static struct irq_info *evtchn_to_info(evtchn_port_t evtchn)
++{
++      int irq;
++
++      if (evtchn >= xen_evtchn_max_channels())
++              return NULL;
++      if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
++              return NULL;
++      irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
++
++      return (irq < 0) ? NULL : info_for_irq(irq);
++}
++
+ /* Per CPU channel accounting */
+ static void channels_on_cpu_dec(struct irq_info *info)
+ {
+@@ -429,7 +433,9 @@ static evtchn_port_t evtchn_from_irq(unsigned int irq)
+ unsigned int irq_from_evtchn(evtchn_port_t evtchn)
+ {
+-      return get_evtchn_to_irq(evtchn);
++      struct irq_info *info = evtchn_to_info(evtchn);
++
++      return info ? info->irq : -1;
+ }
+ EXPORT_SYMBOL_GPL(irq_from_evtchn);
+@@ -473,25 +479,11 @@ static unsigned pirq_from_irq(unsigned irq)
+       return info->u.pirq.pirq;
+ }
+-static enum xen_irq_type type_from_irq(unsigned irq)
+-{
+-      return info_for_irq(irq)->type;
+-}
+-
+-static unsigned cpu_from_irq(unsigned irq)
+-{
+-      return info_for_irq(irq)->cpu;
+-}
+-
+ unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
+ {
+-      int irq = get_evtchn_to_irq(evtchn);
+-      unsigned ret = 0;
+-
+-      if (irq != -1)
+-              ret = cpu_from_irq(irq);
++      struct irq_info *info = evtchn_to_info(evtchn);
+-      return ret;
++      return info ? info->cpu : 0;
+ }
+ static void do_mask(struct irq_info *info, u8 reason)
+@@ -540,13 +532,12 @@ static bool pirq_needs_eoi_flag(unsigned irq)
+ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+                              bool force_affinity)
+ {
+-      int irq = get_evtchn_to_irq(evtchn);
+-      struct irq_info *info = info_for_irq(irq);
++      struct irq_info *info = evtchn_to_info(evtchn);
+-      BUG_ON(irq == -1);
++      BUG_ON(info == NULL);
+       if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+-              struct irq_data *data = irq_get_irq_data(irq);
++              struct irq_data *data = irq_get_irq_data(info->irq);
+               irq_data_update_affinity(data, cpumask_of(cpu));
+               irq_data_update_effective_affinity(data, cpumask_of(cpu));
+@@ -979,13 +970,13 @@ static void __unbind_from_irq(unsigned int irq)
+       }
+       if (VALID_EVTCHN(evtchn)) {
+-              unsigned int cpu = cpu_from_irq(irq);
++              unsigned int cpu = info->cpu;
+               struct xenbus_device *dev;
+               if (!info->is_static)
+                       xen_evtchn_close(evtchn);
+-              switch (type_from_irq(irq)) {
++              switch (info->type) {
+               case IRQT_VIRQ:
+                       per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+                       break;
+@@ -1208,15 +1199,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ {
+       int irq;
+       int ret;
++      struct irq_info *info;
+       if (evtchn >= xen_evtchn_max_channels())
+               return -ENOMEM;
+       mutex_lock(&irq_mapping_update_lock);
+-      irq = get_evtchn_to_irq(evtchn);
++      info = evtchn_to_info(evtchn);
+-      if (irq == -1) {
++      if (!info) {
+               irq = xen_allocate_irq_dynamic();
+               if (irq < 0)
+                       goto out;
+@@ -1239,9 +1231,9 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+                */
+               bind_evtchn_to_cpu(evtchn, 0, false);
+       } else {
+-              struct irq_info *info = info_for_irq(irq);
+-              if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
++              if (!WARN_ON(info->type != IRQT_EVTCHN))
+                       info->refcnt++;
++              irq = info->irq;
+       }
+ out:
+@@ -1579,13 +1571,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority);
+ int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
+ {
+-      int irq = get_evtchn_to_irq(evtchn);
+-      struct irq_info *info;
+-
+-      if (irq == -1)
+-              return -ENOENT;
+-
+-      info = info_for_irq(irq);
++      struct irq_info *info = evtchn_to_info(evtchn);
+       if (!info)
+               return -ENOENT;
+@@ -1601,7 +1587,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
+ int evtchn_get(evtchn_port_t evtchn)
+ {
+-      int irq;
+       struct irq_info *info;
+       int err = -ENOENT;
+@@ -1610,11 +1595,7 @@ int evtchn_get(evtchn_port_t evtchn)
+       mutex_lock(&irq_mapping_update_lock);
+-      irq = get_evtchn_to_irq(evtchn);
+-      if (irq == -1)
+-              goto done;
+-
+-      info = info_for_irq(irq);
++      info = evtchn_to_info(evtchn);
+       if (!info)
+               goto done;
+@@ -1634,10 +1615,11 @@ EXPORT_SYMBOL_GPL(evtchn_get);
+ void evtchn_put(evtchn_port_t evtchn)
+ {
+-      int irq = get_evtchn_to_irq(evtchn);
+-      if (WARN_ON(irq == -1))
++      struct irq_info *info = evtchn_to_info(evtchn);
++
++      if (WARN_ON(!info))
+               return;
+-      unbind_from_irq(irq);
++      unbind_from_irq(info->irq);
+ }
+ EXPORT_SYMBOL_GPL(evtchn_put);
+@@ -1667,12 +1649,10 @@ struct evtchn_loop_ctrl {
+ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+ {
+-      int irq;
+-      struct irq_info *info;
++      struct irq_info *info = evtchn_to_info(port);
+       struct xenbus_device *dev;
+-      irq = get_evtchn_to_irq(port);
+-      if (irq == -1)
++      if (!info)
+               return;
+       /*
+@@ -1697,7 +1677,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+               }
+       }
+-      info = info_for_irq(irq);
+       if (xchg_acquire(&info->is_active, 1))
+               return;
+@@ -1711,7 +1690,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+               info->eoi_time = get_jiffies_64() + event_eoi_delay;
+       }
+-      generic_handle_irq(irq);
++      generic_handle_irq(info->irq);
+ }
+ int xen_evtchn_do_upcall(void)
+@@ -1769,7 +1748,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
+       mutex_lock(&irq_mapping_update_lock);
+       /* After resume the irq<->evtchn mappings are all cleared out */
+-      BUG_ON(get_evtchn_to_irq(evtchn) != -1);
++      BUG_ON(evtchn_to_info(evtchn));
+       /* Expect irq to have been bound before,
+          so there should be a proper type */
+       BUG_ON(info->type == IRQT_UNBOUND);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xen-evtchn-allow-shared-registration-of-irq-handers.patch b/queue-6.6/xen-evtchn-allow-shared-registration-of-irq-handers.patch
new file mode 100644 (file)
index 0000000..67bcaf0
--- /dev/null
@@ -0,0 +1,65 @@
+From 861d0c8cff0a50b41cac7a7ce7ed17b274d2e406 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Oct 2023 12:41:26 +0530
+Subject: xen: evtchn: Allow shared registration of IRQ handers
+
+From: Viresh Kumar <viresh.kumar@linaro.org>
+
+[ Upstream commit 9e90e58c11b74c2bddac4b2702cf79d36b981278 ]
+
+Currently the handling of events is supported either in the kernel or
+userspace, but not both.
+
+In order to support fast delivery of interrupts from the guest to the
+backend, we need to handle the Queue notify part of Virtio protocol in
+kernel and the rest in userspace.
+
+Update the interrupt handler registration flag to IRQF_SHARED for event
+channels, which would allow multiple entities to bind their interrupt
+handler for the same event channel port.
+
+Also increment the reference count of irq_info when multiple entities
+try to bind event channel to irqchip, so the unbinding happens only
+after all the users are gone.
+
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/99b1edfd3147c6b5d22a5139dab5861e767dc34a.1697439990.git.viresh.kumar@linaro.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/events/events_base.c | 3 ++-
+ drivers/xen/evtchn.c             | 2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index c50419638ac0a..cd33a418344a8 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -1235,7 +1235,8 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+               bind_evtchn_to_cpu(evtchn, 0, false);
+       } else {
+               struct irq_info *info = info_for_irq(irq);
+-              WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
++              if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
++                      info->refcnt++;
+       }
+ out:
+diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
+index 9139a7364df53..59717628ca42b 100644
+--- a/drivers/xen/evtchn.c
++++ b/drivers/xen/evtchn.c
+@@ -397,7 +397,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
+       if (rc < 0)
+               goto err;
+-      rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
++      rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, IRQF_SHARED,
+                                              u->name, evtchn);
+       if (rc < 0)
+               goto err;
+-- 
+2.43.0
+