]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Sun, 24 Jul 2022 03:30:01 +0000 (23:30 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 24 Jul 2022 03:30:01 +0000 (23:30 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
45 files changed:
queue-5.4/be2net-fix-buffer-overflow-in-be_get_module_eeprom.patch [new file with mode: 0644]
queue-5.4/gpio-pca953x-only-use-single-read-write-for-no-ai-mo.patch [new file with mode: 0644]
queue-5.4/i2c-cadence-change-large-transfer-count-reset-logic-.patch [new file with mode: 0644]
queue-5.4/i40e-fix-erroneous-adapter-reinitialization-during-r.patch [new file with mode: 0644]
queue-5.4/iavf-fix-handling-of-dummy-receive-descriptors.patch [new file with mode: 0644]
queue-5.4/igc-reinstate-igc_removed-logic-and-implement-it-pro.patch [new file with mode: 0644]
queue-5.4/igmp-fix-a-data-race-around-sysctl_igmp_max_membersh.patch [new file with mode: 0644]
queue-5.4/igmp-fix-data-races-around-sysctl_igmp_llm_reports.patch [new file with mode: 0644]
queue-5.4/ip-fix-a-data-race-around-sysctl_fwmark_reflect.patch [new file with mode: 0644]
queue-5.4/ip-fix-data-races-around-sysctl_ip_fwd_use_pmtu.patch [new file with mode: 0644]
queue-5.4/ip-fix-data-races-around-sysctl_ip_no_pmtu_disc.patch [new file with mode: 0644]
queue-5.4/ip-fix-data-races-around-sysctl_ip_nonlocal_bind.patch [new file with mode: 0644]
queue-5.4/ipv4-fix-a-data-race-around-sysctl_fib_multipath_use.patch [new file with mode: 0644]
queue-5.4/ixgbe-add-locking-to-prevent-panic-when-setting-srio.patch [new file with mode: 0644]
queue-5.4/net-stmmac-fix-dma-queue-left-shift-overflow-issue.patch [new file with mode: 0644]
queue-5.4/net-tls-fix-race-in-tls-device-down-flow.patch [new file with mode: 0644]
queue-5.4/perf-core-fix-data-race-between-perf_event_set_outpu.patch [new file with mode: 0644]
queue-5.4/pinctrl-ralink-check-for-null-return-of-devm_kcalloc.patch [new file with mode: 0644]
queue-5.4/power-reset-arm-versatile-fix-refcount-leak-in-versa.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/tcp-dccp-fix-a-data-race-around-sysctl_tcp_fwmark_ac.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_early_retrans.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_mtu_probe_floo.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_notsent_lowat.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_probe_interval.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_probe_threshol.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_retrans_collap.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_rfc1337.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_stdurg.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_thin_linear_ti.patch [new file with mode: 0644]
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_tw_reuse.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-some-timeout-sysctl-knobs.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl-knobs-related-to-sy.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_max_syn_backlog.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_base_mss.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_fastopen.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_max_reordering.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_min_snd_mss.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_mtu_probing.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_recovery.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_reordering.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_slow_start_afte.patch [new file with mode: 0644]
queue-5.4/tcp-fix-data-races-around-sysctl_tcp_syncookies.patch [new file with mode: 0644]
queue-5.4/udp-fix-a-data-race-around-sysctl_udp_l3mdev_accept.patch [new file with mode: 0644]
queue-5.4/xfrm-xfrm_policy-fix-a-possible-double-xfrm_pols_put.patch [new file with mode: 0644]

diff --git a/queue-5.4/be2net-fix-buffer-overflow-in-be_get_module_eeprom.patch b/queue-5.4/be2net-fix-buffer-overflow-in-be_get_module_eeprom.patch
new file mode 100644 (file)
index 0000000..fbe1a70
--- /dev/null
@@ -0,0 +1,144 @@
+From 544e2c83df0a6c7ebc792fca868bd51f0f95af23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Jul 2022 11:51:34 +0300
+Subject: be2net: Fix buffer overflow in be_get_module_eeprom
+
+From: Hristo Venev <hristo@venev.name>
+
+[ Upstream commit d7241f679a59cfe27f92cb5c6272cb429fb1f7ec ]
+
+be_cmd_read_port_transceiver_data assumes that it is given a buffer that
+is at least PAGE_DATA_LEN long, or twice that if the module supports SFF
+8472. However, this is not always the case.
+
+Fix this by passing the desired offset and length to
+be_cmd_read_port_transceiver_data so that we only copy the bytes once.
+
+Fixes: e36edd9d26cf ("be2net: add ethtool "-m" option support")
+Signed-off-by: Hristo Venev <hristo@venev.name>
+Link: https://lore.kernel.org/r/20220716085134.6095-1-hristo@venev.name
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/emulex/benet/be_cmds.c   | 10 +++---
+ drivers/net/ethernet/emulex/benet/be_cmds.h   |  2 +-
+ .../net/ethernet/emulex/benet/be_ethtool.c    | 31 ++++++++++++-------
+ 3 files changed, 25 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
+index 649c5c429bd7..1288b5e3d220 100644
+--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
++++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
+@@ -2287,7 +2287,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state)
+ /* Uses sync mcc */
+ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
+-                                    u8 page_num, u8 *data)
++                                    u8 page_num, u32 off, u32 len, u8 *data)
+ {
+       struct be_dma_mem cmd;
+       struct be_mcc_wrb *wrb;
+@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
+       req->port = cpu_to_le32(adapter->hba_port_num);
+       req->page_num = cpu_to_le32(page_num);
+       status = be_mcc_notify_wait(adapter);
+-      if (!status) {
++      if (!status && len > 0) {
+               struct be_cmd_resp_port_type *resp = cmd.va;
+-              memcpy(data, resp->page_data, PAGE_DATA_LEN);
++              memcpy(data, resp->page_data + off, len);
+       }
+ err:
+       mutex_unlock(&adapter->mcc_lock);
+@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter)
+       int status;
+       status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+-                                                 page_data);
++                                                 0, PAGE_DATA_LEN, page_data);
+       if (!status) {
+               switch (adapter->phy.interface_type) {
+               case PHY_TYPE_QSFP:
+@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter)
+       int status;
+       status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+-                                                 page_data);
++                                                 0, PAGE_DATA_LEN, page_data);
+       if (!status) {
+               strlcpy(adapter->phy.vendor_name, page_data +
+                       SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
+diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
+index c30d6d6f0f3a..9e17d6a7ab8c 100644
+--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
++++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
+@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
+ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
+                           u32 *state);
+ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
+-                                    u8 page_num, u8 *data);
++                                    u8 page_num, u32 off, u32 len, u8 *data);
+ int be_cmd_query_cable_type(struct be_adapter *adapter);
+ int be_cmd_query_sfp_info(struct be_adapter *adapter);
+ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
+diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
+index 5bb5abf99588..7cc1f41971c5 100644
+--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
++++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
+@@ -1339,7 +1339,7 @@ static int be_get_module_info(struct net_device *netdev,
+               return -EOPNOTSUPP;
+       status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+-                                                 page_data);
++                                                 0, PAGE_DATA_LEN, page_data);
+       if (!status) {
+               if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
+                       modinfo->type = ETH_MODULE_SFF_8079;
+@@ -1357,25 +1357,32 @@ static int be_get_module_eeprom(struct net_device *netdev,
+ {
+       struct be_adapter *adapter = netdev_priv(netdev);
+       int status;
++      u32 begin, end;
+       if (!check_privilege(adapter, MAX_PRIVILEGES))
+               return -EOPNOTSUPP;
+-      status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+-                                                 data);
+-      if (status)
+-              goto err;
++      begin = eeprom->offset;
++      end = eeprom->offset + eeprom->len;
++
++      if (begin < PAGE_DATA_LEN) {
++              status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
++                                                         min_t(u32, end, PAGE_DATA_LEN) - begin,
++                                                         data);
++              if (status)
++                      goto err;
++
++              data += PAGE_DATA_LEN - begin;
++              begin = PAGE_DATA_LEN;
++      }
+-      if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
+-              status = be_cmd_read_port_transceiver_data(adapter,
+-                                                         TR_PAGE_A2,
+-                                                         data +
+-                                                         PAGE_DATA_LEN);
++      if (end > PAGE_DATA_LEN) {
++              status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
++                                                         begin - PAGE_DATA_LEN,
++                                                         end - begin, data);
+               if (status)
+                       goto err;
+       }
+-      if (eeprom->offset)
+-              memcpy(data, data + eeprom->offset, eeprom->len);
+ err:
+       return be_cmd_status(status);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.4/gpio-pca953x-only-use-single-read-write-for-no-ai-mo.patch b/queue-5.4/gpio-pca953x-only-use-single-read-write-for-no-ai-mo.patch
new file mode 100644 (file)
index 0000000..475f854
--- /dev/null
@@ -0,0 +1,48 @@
+From 34840c562bf4534e85001642578db421f47bf7c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 16:31:41 +0800
+Subject: gpio: pca953x: only use single read/write for No AI mode
+
+From: Haibo Chen <haibo.chen@nxp.com>
+
+[ Upstream commit db8edaa09d7461ec08672a92a2eef63d5882bb79 ]
+
+For the device use NO AI mode(not support auto address increment),
+only use the single read/write when config the regmap.
+
+We meet issue on PCA9557PW on i.MX8QXP/DXL evk board, this device
+do not support AI mode, but when do the regmap sync, regmap will
+sync 3 byte data to register 1, logically this means write first
+data to register 1, write second data to register 2, write third data
+to register 3. But this device do not support AI mode, finally, these
+three data write only into register 1 one by one. the reault is the
+value of register 1 alway equal to the latest data, here is the third
+data, no operation happened on register 2 and register 3. This is
+not what we expect.
+
+Fixes: 49427232764d ("gpio: pca953x: Perform basic regmap conversion")
+Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-pca953x.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
+index 54da66d02b0e..317f54f19477 100644
+--- a/drivers/gpio/gpio-pca953x.c
++++ b/drivers/gpio/gpio-pca953x.c
+@@ -379,6 +379,9 @@ static const struct regmap_config pca953x_i2c_regmap = {
+       .reg_bits = 8,
+       .val_bits = 8,
++      .use_single_read = true,
++      .use_single_write = true,
++
+       .readable_reg = pca953x_readable_register,
+       .writeable_reg = pca953x_writeable_register,
+       .volatile_reg = pca953x_volatile_register,
+-- 
+2.35.1
+
diff --git a/queue-5.4/i2c-cadence-change-large-transfer-count-reset-logic-.patch b/queue-5.4/i2c-cadence-change-large-transfer-count-reset-logic-.patch
new file mode 100644 (file)
index 0000000..ac5bba9
--- /dev/null
@@ -0,0 +1,111 @@
+From 73c36688bcd85e0772536a3c2b31bddd2d813af2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 17:29:19 -0600
+Subject: i2c: cadence: Change large transfer count reset logic to be
+ unconditional
+
+From: Robert Hancock <robert.hancock@calian.com>
+
+[ Upstream commit 4ca8ca873d454635c20d508261bfc0081af75cf8 ]
+
+Problems were observed on the Xilinx ZynqMP platform with large I2C reads.
+When a read of 277 bytes was performed, the controller NAKed the transfer
+after only 252 bytes were transferred and returned an ENXIO error on the
+transfer.
+
+There is some code in cdns_i2c_master_isr to handle this case by resetting
+the transfer count in the controller before it reaches 0, to allow larger
+transfers to work, but it was conditional on the CDNS_I2C_BROKEN_HOLD_BIT
+quirk being set on the controller, and ZynqMP uses the r1p14 version of
+the core where this quirk is not being set. The requirement to do this to
+support larger reads seems like an inherently required workaround due to
+the core only having an 8-bit transfer size register, so it does not
+appear that this should be conditional on the broken HOLD bit quirk which
+is used elsewhere in the driver.
+
+Remove the dependency on the CDNS_I2C_BROKEN_HOLD_BIT for this transfer
+size reset logic to fix this problem.
+
+Fixes: 63cab195bf49 ("i2c: removed work arounds in i2c driver for Zynq Ultrascale+ MPSoC")
+Signed-off-by: Robert Hancock <robert.hancock@calian.com>
+Reviewed-by: Shubhrajyoti Datta <Shubhrajyoti.datta@amd.com>
+Acked-by: Michal Simek <michal.simek@amd.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-cadence.c | 30 +++++-------------------------
+ 1 file changed, 5 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
+index 3a1bdc75275f..8750e444f449 100644
+--- a/drivers/i2c/busses/i2c-cadence.c
++++ b/drivers/i2c/busses/i2c-cadence.c
+@@ -198,9 +198,9 @@ static inline bool cdns_is_holdquirk(struct cdns_i2c *id, bool hold_wrkaround)
+  */
+ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
+ {
+-      unsigned int isr_status, avail_bytes, updatetx;
++      unsigned int isr_status, avail_bytes;
+       unsigned int bytes_to_send;
+-      bool hold_quirk;
++      bool updatetx;
+       struct cdns_i2c *id = ptr;
+       /* Signal completion only after everything is updated */
+       int done_flag = 0;
+@@ -219,11 +219,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
+        * Check if transfer size register needs to be updated again for a
+        * large data receive operation.
+        */
+-      updatetx = 0;
+-      if (id->recv_count > id->curr_recv_count)
+-              updatetx = 1;
+-
+-      hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx;
++      updatetx = id->recv_count > id->curr_recv_count;
+       /* When receiving, handle data interrupt and completion interrupt */
+       if (id->p_recv_buf &&
+@@ -246,7 +242,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
+                       id->recv_count--;
+                       id->curr_recv_count--;
+-                      if (cdns_is_holdquirk(id, hold_quirk))
++                      if (cdns_is_holdquirk(id, updatetx))
+                               break;
+               }
+@@ -257,7 +253,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
+                * maintain transfer size non-zero while performing a large
+                * receive operation.
+                */
+-              if (cdns_is_holdquirk(id, hold_quirk)) {
++              if (cdns_is_holdquirk(id, updatetx)) {
+                       /* wait while fifo is full */
+                       while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) !=
+                              (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH))
+@@ -279,22 +275,6 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr)
+                                                 CDNS_I2C_XFER_SIZE_OFFSET);
+                               id->curr_recv_count = id->recv_count;
+                       }
+-              } else if (id->recv_count && !hold_quirk &&
+-                                              !id->curr_recv_count) {
+-
+-                      /* Set the slave address in address register*/
+-                      cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
+-                                              CDNS_I2C_ADDR_OFFSET);
+-
+-                      if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) {
+-                              cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE,
+-                                              CDNS_I2C_XFER_SIZE_OFFSET);
+-                              id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE;
+-                      } else {
+-                              cdns_i2c_writereg(id->recv_count,
+-                                              CDNS_I2C_XFER_SIZE_OFFSET);
+-                              id->curr_recv_count = id->recv_count;
+-                      }
+               }
+               /* Clear hold (if not repeated start) and signal completion */
+-- 
+2.35.1
+
diff --git a/queue-5.4/i40e-fix-erroneous-adapter-reinitialization-during-r.patch b/queue-5.4/i40e-fix-erroneous-adapter-reinitialization-during-r.patch
new file mode 100644 (file)
index 0000000..0436c4e
--- /dev/null
@@ -0,0 +1,83 @@
+From 84c7587b9d83b4c31d1c3494bf2367bb67275905 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 14:45:41 -0700
+Subject: i40e: Fix erroneous adapter reinitialization during recovery process
+
+From: Dawid Lukwinski <dawid.lukwinski@intel.com>
+
+[ Upstream commit f838a63369818faadec4ad1736cfbd20ab5da00e ]
+
+Fix an issue when driver incorrectly detects state
+of recovery process and erroneously reinitializes interrupts,
+which results in a kernel error and call trace message.
+
+The issue was caused by a combination of two factors:
+1. Assuming the EMP reset issued after completing
+firmware recovery means the whole recovery process is complete.
+2. Erroneous reinitialization of interrupt vector after detecting
+the above mentioned EMP reset.
+
+Fixes (1) by changing how recovery state change is detected
+and (2) by adjusting the conditional expression to ensure using proper
+interrupt reinitialization method, depending on the situation.
+
+Fixes: 4ff0ee1af016 ("i40e: Introduce recovery mode support")
+Signed-off-by: Dawid Lukwinski <dawid.lukwinski@intel.com>
+Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
+Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20220715214542.2968762-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 05442bbc218c..0610d344fdbf 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -10068,7 +10068,7 @@ static int i40e_reset(struct i40e_pf *pf)
+  **/
+ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ {
+-      int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
++      const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status ret;
+@@ -10076,13 +10076,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+       int v;
+       if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+-          i40e_check_recovery_mode(pf)) {
++          is_recovery_mode_reported)
+               i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
+-      }
+       if (test_bit(__I40E_DOWN, pf->state) &&
+-          !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+-          !old_recovery_mode_bit)
++          !test_bit(__I40E_RECOVERY_MODE, pf->state))
+               goto clear_recovery;
+       dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
+@@ -10109,13 +10107,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+        * accordingly with regard to resources initialization
+        * and deinitialization
+        */
+-      if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
+-          old_recovery_mode_bit) {
++      if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+               if (i40e_get_capabilities(pf,
+                                         i40e_aqc_opc_list_func_capabilities))
+                       goto end_unlock;
+-              if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
++              if (is_recovery_mode_reported) {
+                       /* we're staying in recovery mode so we'll reinitialize
+                        * misc vector here
+                        */
+-- 
+2.35.1
+
diff --git a/queue-5.4/iavf-fix-handling-of-dummy-receive-descriptors.patch b/queue-5.4/iavf-fix-handling-of-dummy-receive-descriptors.patch
new file mode 100644 (file)
index 0000000..cbee614
--- /dev/null
@@ -0,0 +1,48 @@
+From 12112d102f6b5169b8fdb8a8a7910140ffe492ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Jun 2022 17:33:01 -0700
+Subject: iavf: Fix handling of dummy receive descriptors
+
+From: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
+
+[ Upstream commit a9f49e0060301a9bfebeca76739158d0cf91cdf6 ]
+
+Fix memory leak caused by not handling dummy receive descriptor properly.
+iavf_get_rx_buffer now sets the rx_buffer return value for dummy receive
+descriptors. Without this patch, when the hardware writes a dummy
+descriptor, iavf would not free the page allocated for the previous receive
+buffer. This is an unlikely event but can still happen.
+
+[Jesse: massaged commit message]
+
+Fixes: efa14c398582 ("iavf: allow null RX descriptors")
+Signed-off-by: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
+Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_txrx.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index 7a30d5d5ef53..c6905d1b6182 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -1263,11 +1263,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
+ {
+       struct iavf_rx_buffer *rx_buffer;
+-      if (!size)
+-              return NULL;
+-
+       rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
++      if (!size)
++              return rx_buffer;
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+-- 
+2.35.1
+
diff --git a/queue-5.4/igc-reinstate-igc_removed-logic-and-implement-it-pro.patch b/queue-5.4/igc-reinstate-igc_removed-logic-and-implement-it-pro.patch
new file mode 100644 (file)
index 0000000..381b75c
--- /dev/null
@@ -0,0 +1,94 @@
+From dedb2fb7439af608a1fdc2c7078ea535967910d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jun 2022 18:58:11 +0300
+Subject: igc: Reinstate IGC_REMOVED logic and implement it properly
+
+From: Lennert Buytenhek <buytenh@wantstofly.org>
+
+[ Upstream commit 7c1ddcee5311f3315096217881d2dbe47cc683f9 ]
+
+The initially merged version of the igc driver code (via commit
+146740f9abc4, "igc: Add support for PF") contained the following
+IGC_REMOVED checks in the igc_rd32/wr32() MMIO accessors:
+
+       u32 igc_rd32(struct igc_hw *hw, u32 reg)
+       {
+               u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+               u32 value = 0;
+
+               if (IGC_REMOVED(hw_addr))
+                       return ~value;
+
+               value = readl(&hw_addr[reg]);
+
+               /* reads should not return all F's */
+               if (!(~value) && (!reg || !(~readl(hw_addr))))
+                       hw->hw_addr = NULL;
+
+               return value;
+       }
+
+And:
+
+       #define wr32(reg, val) \
+       do { \
+               u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+               if (!IGC_REMOVED(hw_addr)) \
+                       writel((val), &hw_addr[(reg)]); \
+       } while (0)
+
+E.g. igb has similar checks in its MMIO accessors, and has a similar
+macro E1000_REMOVED, which is implemented as follows:
+
+       #define E1000_REMOVED(h) unlikely(!(h))
+
+These checks serve to detect and take note of an 0xffffffff MMIO read
+return from the device, which can be caused by a PCIe link flap or some
+other kind of PCI bus error, and to avoid performing MMIO reads and
+writes from that point onwards.
+
+However, the IGC_REMOVED macro was not originally implemented:
+
+       #ifndef IGC_REMOVED
+       #define IGC_REMOVED(a) (0)
+       #endif /* IGC_REMOVED */
+
+This led to the IGC_REMOVED logic to be removed entirely in a
+subsequent commit (commit 3c215fb18e70, "igc: remove IGC_REMOVED
+function"), with the rationale that such checks matter only for
+virtualization and that igc does not support virtualization -- but a
+PCIe device can become detached even without virtualization being in
+use, and without proper checks, a PCIe bus error affecting an igc
+adapter will lead to various NULL pointer dereferences, as the first
+access after the error will set hw->hw_addr to NULL, and subsequent
+accesses will blindly dereference this now-NULL pointer.
+
+This patch reinstates the IGC_REMOVED checks in igc_rd32/wr32(), and
+implements IGC_REMOVED the way it is done for igb, by checking for the
+unlikely() case of hw_addr being NULL.  This change prevents the oopses
+seen when a PCIe link flap occurs on an igc adapter.
+
+Fixes: 146740f9abc4 ("igc: Add support for PF")
+Signed-off-by: Lennert Buytenhek <buytenh@arista.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Acked-by: Sasha Neftin <sasha.neftin@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_regs.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
+index 50d7c04dccf5..7bc7d7618fe1 100644
+--- a/drivers/net/ethernet/intel/igc/igc_regs.h
++++ b/drivers/net/ethernet/intel/igc/igc_regs.h
+@@ -236,4 +236,6 @@ do { \
+ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
++#define IGC_REMOVED(h) unlikely(!(h))
++
+ #endif
+-- 
+2.35.1
+
diff --git a/queue-5.4/igmp-fix-a-data-race-around-sysctl_igmp_max_membersh.patch b/queue-5.4/igmp-fix-a-data-race-around-sysctl_igmp_max_membersh.patch
new file mode 100644 (file)
index 0000000..e9a9c8f
--- /dev/null
@@ -0,0 +1,36 @@
+From 6bc15d5cc142e8e6e12144a0f49fe0199352e5b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:42 -0700
+Subject: igmp: Fix a data-race around sysctl_igmp_max_memberships.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 6305d821e3b9b5379d348528e5b5faf316383bc2 ]
+
+While reading sysctl_igmp_max_memberships, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 7cd444d75c3d..660b41040c77 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -2199,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+               count++;
+       }
+       err = -ENOBUFS;
+-      if (count >= net->ipv4.sysctl_igmp_max_memberships)
++      if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
+               goto done;
+       iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
+       if (!iml)
+-- 
+2.35.1
+
diff --git a/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_llm_reports.patch b/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_llm_reports.patch
new file mode 100644 (file)
index 0000000..df5a639
--- /dev/null
@@ -0,0 +1,110 @@
+From 38f9325914b683f2aae7cc0e3b99287c49c0c9dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:41 -0700
+Subject: igmp: Fix data-races around sysctl_igmp_llm_reports.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit f6da2267e71106474fbc0943dc24928b9cb79119 ]
+
+While reading sysctl_igmp_llm_reports, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+This test can be packed into a helper, so such changes will be in the
+follow-up series after net is merged into net-next.
+
+  if (ipv4_is_local_multicast(pmc->multiaddr) &&
+      !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+
+Fixes: df2cf4a78e48 ("IGMP: Inhibit reports for local multicast groups")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index cac2fdd08df0..7cd444d75c3d 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -469,7 +469,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
+       if (pmc->multiaddr == IGMP_ALL_HOSTS)
+               return skb;
+-      if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
++      if (ipv4_is_local_multicast(pmc->multiaddr) &&
++          !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+               return skb;
+       mtu = READ_ONCE(dev->mtu);
+@@ -595,7 +596,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
+                       if (pmc->multiaddr == IGMP_ALL_HOSTS)
+                               continue;
+                       if (ipv4_is_local_multicast(pmc->multiaddr) &&
+-                           !net->ipv4.sysctl_igmp_llm_reports)
++                          !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+                               continue;
+                       spin_lock_bh(&pmc->lock);
+                       if (pmc->sfcount[MCAST_EXCLUDE])
+@@ -738,7 +739,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
+       if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
+               return igmpv3_send_report(in_dev, pmc);
+-      if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
++      if (ipv4_is_local_multicast(group) &&
++          !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+               return 0;
+       if (type == IGMP_HOST_LEAVE_MESSAGE)
+@@ -922,7 +924,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
+       if (group == IGMP_ALL_HOSTS)
+               return false;
+-      if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
++      if (ipv4_is_local_multicast(group) &&
++          !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+               return false;
+       rcu_read_lock();
+@@ -1047,7 +1050,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+               if (im->multiaddr == IGMP_ALL_HOSTS)
+                       continue;
+               if (ipv4_is_local_multicast(im->multiaddr) &&
+-                  !net->ipv4.sysctl_igmp_llm_reports)
++                  !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+                       continue;
+               spin_lock_bh(&im->lock);
+               if (im->tm_running)
+@@ -1298,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
+ #ifdef CONFIG_IP_MULTICAST
+       if (im->multiaddr == IGMP_ALL_HOSTS)
+               return;
+-      if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
++      if (ipv4_is_local_multicast(im->multiaddr) &&
++          !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+               return;
+       reporter = im->reporter;
+@@ -1340,7 +1344,8 @@ static void igmp_group_added(struct ip_mc_list *im)
+ #ifdef CONFIG_IP_MULTICAST
+       if (im->multiaddr == IGMP_ALL_HOSTS)
+               return;
+-      if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
++      if (ipv4_is_local_multicast(im->multiaddr) &&
++          !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+               return;
+       if (in_dev->dead)
+@@ -1644,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
+               if (im->multiaddr == IGMP_ALL_HOSTS)
+                       continue;
+               if (ipv4_is_local_multicast(im->multiaddr) &&
+-                  !net->ipv4.sysctl_igmp_llm_reports)
++                  !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+                       continue;
+               /* a failover is happening and switches
+-- 
+2.35.1
+
diff --git a/queue-5.4/ip-fix-a-data-race-around-sysctl_fwmark_reflect.patch b/queue-5.4/ip-fix-a-data-race-around-sysctl_fwmark_reflect.patch
new file mode 100644 (file)
index 0000000..ae37350
--- /dev/null
@@ -0,0 +1,36 @@
+From 822b3882b6e475719b462482b3568f37ff3afd6a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:51:57 -0700
+Subject: ip: Fix a data-race around sysctl_fwmark_reflect.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 85d0b4dbd74b95cc492b1f4e34497d3f894f5d9a ]
+
+While reading sysctl_fwmark_reflect, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: e110861f8609 ("net: add a sysctl to reflect the fwmark on replies")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 21fc0a29a8d4..db841ab388c0 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -381,7 +381,7 @@ void ipfrag_init(void);
+ void ip_static_sysctl_init(void);
+ #define IP4_REPLY_MARK(net, mark) \
+-      ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
++      (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
+ static inline bool ip_is_fragment(const struct iphdr *iph)
+ {
+-- 
+2.35.1
+
diff --git a/queue-5.4/ip-fix-data-races-around-sysctl_ip_fwd_use_pmtu.patch b/queue-5.4/ip-fix-data-races-around-sysctl_ip_fwd_use_pmtu.patch
new file mode 100644 (file)
index 0000000..81c5631
--- /dev/null
@@ -0,0 +1,50 @@
+From bd481496e95021b14a71a4ab62f8b9c85b37191f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:51:53 -0700
+Subject: ip: Fix data-races around sysctl_ip_fwd_use_pmtu.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 60c158dc7b1f0558f6cadd5b50d0386da0000d50 ]
+
+While reading sysctl_ip_fwd_use_pmtu, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: f87c10a8aa1e ("ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h | 2 +-
+ net/ipv4/route.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 3f3ea86b2173..21fc0a29a8d4 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -442,7 +442,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
+       struct net *net = dev_net(dst->dev);
+       unsigned int mtu;
+-      if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
++      if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
+           ip_mtu_locked(dst) ||
+           !forwarding)
+               return dst_mtu(dst);
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 9280e5087159..7004e379c325 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1423,7 +1423,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
+       struct fib_info *fi = res->fi;
+       u32 mtu = 0;
+-      if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
++      if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
+           fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
+               mtu = fi->fib_mtu;
+-- 
+2.35.1
+
diff --git a/queue-5.4/ip-fix-data-races-around-sysctl_ip_no_pmtu_disc.patch b/queue-5.4/ip-fix-data-races-around-sysctl_ip_no_pmtu_disc.patch
new file mode 100644 (file)
index 0000000..aa90647
--- /dev/null
@@ -0,0 +1,78 @@
+From 8e6c2829d839c32a5460bf14a6861d567b1ed262 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:51:52 -0700
+Subject: ip: Fix data-races around sysctl_ip_no_pmtu_disc.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 0968d2a441bf6afb551fd99e60fa65ed67068963 ]
+
+While reading sysctl_ip_no_pmtu_disc, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/af_inet.c    | 2 +-
+ net/ipv4/icmp.c       | 2 +-
+ net/ipv6/af_inet6.c   | 2 +-
+ net/xfrm/xfrm_state.c | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 9ab73fcc7411..06153386776d 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -337,7 +337,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
+                       inet->hdrincl = 1;
+       }
+-      if (net->ipv4.sysctl_ip_no_pmtu_disc)
++      if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
+               inet->pmtudisc = IP_PMTUDISC_DONT;
+       else
+               inet->pmtudisc = IP_PMTUDISC_WANT;
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 9bc01411be4c..b44f51e404ae 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -886,7 +886,7 @@ static bool icmp_unreach(struct sk_buff *skb)
+                        * values please see
+                        * Documentation/networking/ip-sysctl.txt
+                        */
+-                      switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
++                      switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
+                       default:
+                               net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
+                                                   &iph->daddr);
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index 942da168f18f..56f396ecc26b 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -222,7 +222,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
+       inet->mc_list   = NULL;
+       inet->rcv_tos   = 0;
+-      if (net->ipv4.sysctl_ip_no_pmtu_disc)
++      if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
+               inet->pmtudisc = IP_PMTUDISC_DONT;
+       else
+               inet->pmtudisc = IP_PMTUDISC_WANT;
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index 268bba29bb60..bee1a8143d75 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -2488,7 +2488,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+       int err;
+       if (family == AF_INET &&
+-          xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
++          READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
+               x->props.flags |= XFRM_STATE_NOPMTUDISC;
+       err = -EPROTONOSUPPORT;
+-- 
+2.35.1
+
diff --git a/queue-5.4/ip-fix-data-races-around-sysctl_ip_nonlocal_bind.patch b/queue-5.4/ip-fix-data-races-around-sysctl_ip_nonlocal_bind.patch
new file mode 100644 (file)
index 0000000..cb34be1
--- /dev/null
@@ -0,0 +1,50 @@
+From af083ed073cda77765d24bb92cc424bf5d08f0e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:51:55 -0700
+Subject: ip: Fix data-races around sysctl_ip_nonlocal_bind.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 289d3b21fb0bfc94c4e98f10635bba1824e5f83c ]
+
+While reading sysctl_ip_nonlocal_bind, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_sock.h | 2 +-
+ net/sctp/protocol.c     | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
+index 34c4436fd18f..40f92f5a3047 100644
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -375,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk)
+ static inline bool inet_can_nonlocal_bind(struct net *net,
+                                         struct inet_sock *inet)
+ {
+-      return net->ipv4.sysctl_ip_nonlocal_bind ||
++      return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
+               inet->freebind || inet->transparent;
+ }
+diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
+index bb370a7948f4..363a64c12414 100644
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
+       if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
+          ret != RTN_LOCAL &&
+          !sp->inet.freebind &&
+-         !net->ipv4.sysctl_ip_nonlocal_bind)
++          !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
+               return 0;
+       if (ipv6_only_sock(sctp_opt2sk(sp)))
+-- 
+2.35.1
+
diff --git a/queue-5.4/ipv4-fix-a-data-race-around-sysctl_fib_multipath_use.patch b/queue-5.4/ipv4-fix-a-data-race-around-sysctl_fib_multipath_use.patch
new file mode 100644 (file)
index 0000000..e3744f5
--- /dev/null
@@ -0,0 +1,36 @@
+From 2cd17ce602019c536b9c5f3e593df0284e6b0eb9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:39 -0700
+Subject: ipv4: Fix a data-race around sysctl_fib_multipath_use_neigh.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 87507bcb4f5de16bb419e9509d874f4db6c0ad0f ]
+
+While reading sysctl_fib_multipath_use_neigh, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: a6db4494d218 ("net: ipv4: Consider failed nexthops in multipath routes")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_semantics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 16fe03461563..28da0443f3e9 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -2209,7 +2209,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
+       }
+       change_nexthops(fi) {
+-              if (net->ipv4.sysctl_fib_multipath_use_neigh) {
++              if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
+                       if (!fib_good_nh(nexthop_nh))
+                               continue;
+                       if (!first) {
+-- 
+2.35.1
+
diff --git a/queue-5.4/ixgbe-add-locking-to-prevent-panic-when-setting-srio.patch b/queue-5.4/ixgbe-add-locking-to-prevent-panic-when-setting-srio.patch
new file mode 100644 (file)
index 0000000..80b1789
--- /dev/null
@@ -0,0 +1,138 @@
+From 47205ec2dc0e738da87975a0f15647f21fb03287 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 14:44:56 -0700
+Subject: ixgbe: Add locking to prevent panic when setting sriov_numvfs to zero
+
+From: Piotr Skajewski <piotrx.skajewski@intel.com>
+
+[ Upstream commit 1e53834ce541d4fe271cdcca7703e50be0a44f8a ]
+
+It is possible to disable VFs while the PF driver is processing requests
+from the VF driver.  This can result in a panic.
+
+BUG: unable to handle kernel paging request at 000000000000106c
+PGD 0 P4D 0
+Oops: 0000 [#1] SMP NOPTI
+CPU: 8 PID: 0 Comm: swapper/8 Kdump: loaded Tainted: G I      --------- -
+Hardware name: Dell Inc. PowerEdge R740/06WXJT, BIOS 2.8.2 08/27/2020
+RIP: 0010:ixgbe_msg_task+0x4c8/0x1690 [ixgbe]
+Code: 00 00 48 8d 04 40 48 c1 e0 05 89 7c 24 24 89 fd 48 89 44 24 10 83 ff
+01 0f 84 b8 04 00 00 4c 8b 64 24 10 4d 03 a5 48 22 00 00 <41> 80 7c 24 4c
+00 0f 84 8a 03 00 00 0f b7 c7 83 f8 08 0f 84 8f 0a
+RSP: 0018:ffffb337869f8df8 EFLAGS: 00010002
+RAX: 0000000000001020 RBX: 0000000000000000 RCX: 000000000000002b
+RDX: 0000000000000002 RSI: 0000000000000008 RDI: 0000000000000006
+RBP: 0000000000000006 R08: 0000000000000002 R09: 0000000000029780
+R10: 00006957d8f42832 R11: 0000000000000000 R12: 0000000000001020
+R13: ffff8a00e8978ac0 R14: 000000000000002b R15: ffff8a00e8979c80
+FS:  0000000000000000(0000) GS:ffff8a07dfd00000(0000) knlGS:00000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000000106c CR3: 0000000063e10004 CR4: 00000000007726e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <IRQ>
+ ? ttwu_do_wakeup+0x19/0x140
+ ? try_to_wake_up+0x1cd/0x550
+ ? ixgbevf_update_xcast_mode+0x71/0xc0 [ixgbevf]
+ ixgbe_msix_other+0x17e/0x310 [ixgbe]
+ __handle_irq_event_percpu+0x40/0x180
+ handle_irq_event_percpu+0x30/0x80
+ handle_irq_event+0x36/0x53
+ handle_edge_irq+0x82/0x190
+ handle_irq+0x1c/0x30
+ do_IRQ+0x49/0xd0
+ common_interrupt+0xf/0xf
+
+This can be eventually be reproduced with the following script:
+
+while :
+do
+    echo 63 > /sys/class/net/<devname>/device/sriov_numvfs
+    sleep 1
+    echo 0 > /sys/class/net/<devname>/device/sriov_numvfs
+    sleep 1
+done
+
+Add lock when disabling SR-IOV to prevent process VF mailbox communication.
+
+Fixes: d773d1310625 ("ixgbe: Fix memory leak when SR-IOV VFs are direct assigned")
+Signed-off-by: Piotr Skajewski <piotrx.skajewski@intel.com>
+Tested-by: Marek Szlosek <marek.szlosek@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20220715214456.2968711-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ixgbe/ixgbe.h       | 1 +
+ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  | 3 +++
+ drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 6 ++++++
+ 3 files changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+index 39e73ad60352..fa49ef2afde5 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+@@ -773,6 +773,7 @@ struct ixgbe_adapter {
+ #ifdef CONFIG_IXGBE_IPSEC
+       struct ixgbe_ipsec *ipsec;
+ #endif /* CONFIG_IXGBE_IPSEC */
++      spinlock_t vfs_lock;
+ };
+ static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+index 8a894e5d923f..f8aa1a0b89c5 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -6396,6 +6396,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
+       /* n-tuple support exists, always init our spinlock */
+       spin_lock_init(&adapter->fdir_perfect_lock);
++      /* init spinlock to avoid concurrency of VF resources */
++      spin_lock_init(&adapter->vfs_lock);
++
+ #ifdef CONFIG_IXGBE_DCB
+       ixgbe_init_dcb(adapter);
+ #endif
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+index cf5c2b9465eb..0e73e3b1af19 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+@@ -204,10 +204,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
+ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
+ {
+       unsigned int num_vfs = adapter->num_vfs, vf;
++      unsigned long flags;
+       int rss;
++      spin_lock_irqsave(&adapter->vfs_lock, flags);
+       /* set num VFs to 0 to prevent access to vfinfo */
+       adapter->num_vfs = 0;
++      spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+       /* put the reference to all of the vf devices */
+       for (vf = 0; vf < num_vfs; ++vf) {
+@@ -1305,8 +1308,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
+ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
+ {
+       struct ixgbe_hw *hw = &adapter->hw;
++      unsigned long flags;
+       u32 vf;
++      spin_lock_irqsave(&adapter->vfs_lock, flags);
+       for (vf = 0; vf < adapter->num_vfs; vf++) {
+               /* process any reset requests */
+               if (!ixgbe_check_for_rst(hw, vf))
+@@ -1320,6 +1325,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
+               if (!ixgbe_check_for_ack(hw, vf))
+                       ixgbe_rcv_ack_from_vf(adapter, vf);
+       }
++      spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+ }
+ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter)
+-- 
+2.35.1
+
diff --git a/queue-5.4/net-stmmac-fix-dma-queue-left-shift-overflow-issue.patch b/queue-5.4/net-stmmac-fix-dma-queue-left-shift-overflow-issue.patch
new file mode 100644 (file)
index 0000000..642b2d3
--- /dev/null
@@ -0,0 +1,82 @@
+From bee7317a3987c067ffd2641db09fb71cf163d5a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 15:47:01 +0800
+Subject: net: stmmac: fix dma queue left shift overflow issue
+
+From: Junxiao Chang <junxiao.chang@intel.com>
+
+[ Upstream commit 613b065ca32e90209024ec4a6bb5ca887ee70980 ]
+
+When queue number is > 4, left shift overflows due to 32 bits
+integer variable. Mask calculation is wrong for MTL_RXQ_DMA_MAP1.
+
+If CONFIG_UBSAN is enabled, kernel dumps below warning:
+[   10.363842] ==================================================================
+[   10.363882] UBSAN: shift-out-of-bounds in /build/linux-intel-iotg-5.15-8e6Tf4/
+linux-intel-iotg-5.15-5.15.0/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:224:12
+[   10.363929] shift exponent 40 is too large for 32-bit type 'unsigned int'
+[   10.363953] CPU: 1 PID: 599 Comm: NetworkManager Not tainted 5.15.0-1003-intel-iotg
+[   10.363956] Hardware name: ADLINK Technology Inc. LEC-EL/LEC-EL, BIOS 0.15.11 12/22/2021
+[   10.363958] Call Trace:
+[   10.363960]  <TASK>
+[   10.363963]  dump_stack_lvl+0x4a/0x5f
+[   10.363971]  dump_stack+0x10/0x12
+[   10.363974]  ubsan_epilogue+0x9/0x45
+[   10.363976]  __ubsan_handle_shift_out_of_bounds.cold+0x61/0x10e
+[   10.363979]  ? wake_up_klogd+0x4a/0x50
+[   10.363983]  ? vprintk_emit+0x8f/0x240
+[   10.363986]  dwmac4_map_mtl_dma.cold+0x42/0x91 [stmmac]
+[   10.364001]  stmmac_mtl_configuration+0x1ce/0x7a0 [stmmac]
+[   10.364009]  ? dwmac410_dma_init_channel+0x70/0x70 [stmmac]
+[   10.364020]  stmmac_hw_setup.cold+0xf/0xb14 [stmmac]
+[   10.364030]  ? page_pool_alloc_pages+0x4d/0x70
+[   10.364034]  ? stmmac_clear_tx_descriptors+0x6e/0xe0 [stmmac]
+[   10.364042]  stmmac_open+0x39e/0x920 [stmmac]
+[   10.364050]  __dev_open+0xf0/0x1a0
+[   10.364054]  __dev_change_flags+0x188/0x1f0
+[   10.364057]  dev_change_flags+0x26/0x60
+[   10.364059]  do_setlink+0x908/0xc40
+[   10.364062]  ? do_setlink+0xb10/0xc40
+[   10.364064]  ? __nla_validate_parse+0x4c/0x1a0
+[   10.364068]  __rtnl_newlink+0x597/0xa10
+[   10.364072]  ? __nla_reserve+0x41/0x50
+[   10.364074]  ? __kmalloc_node_track_caller+0x1d0/0x4d0
+[   10.364079]  ? pskb_expand_head+0x75/0x310
+[   10.364082]  ? nla_reserve_64bit+0x21/0x40
+[   10.364086]  ? skb_free_head+0x65/0x80
+[   10.364089]  ? security_sock_rcv_skb+0x2c/0x50
+[   10.364094]  ? __cond_resched+0x19/0x30
+[   10.364097]  ? kmem_cache_alloc_trace+0x15a/0x420
+[   10.364100]  rtnl_newlink+0x49/0x70
+
+This change fixes MTL_RXQ_DMA_MAP1 mask issue and channel/queue
+mapping warning.
+
+Fixes: d43042f4da3e ("net: stmmac: mapping mtl rx to dma channel")
+BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216195
+Reported-by: Cedric Wassenaar <cedric@bytespeed.nl>
+Signed-off-by: Junxiao Chang <junxiao.chang@intel.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+index 66e60c7e9850..c440b192ec71 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+@@ -215,6 +215,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
+       if (queue == 0 || queue == 4) {
+               value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
+               value |= MTL_RXQ_DMA_Q04MDMACH(chan);
++      } else if (queue > 4) {
++              value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
++              value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
+       } else {
+               value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
+               value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
+-- 
+2.35.1
+
diff --git a/queue-5.4/net-tls-fix-race-in-tls-device-down-flow.patch b/queue-5.4/net-tls-fix-race-in-tls-device-down-flow.patch
new file mode 100644 (file)
index 0000000..6fcabc2
--- /dev/null
@@ -0,0 +1,72 @@
+From f2a30f776d728eeb26118bf86ad121b8deba6d04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 11:42:16 +0300
+Subject: net/tls: Fix race in TLS device down flow
+
+From: Tariq Toukan <tariqt@nvidia.com>
+
+[ Upstream commit f08d8c1bb97c48f24a82afaa2fd8c140f8d3da8b ]
+
+Socket destruction flow and tls_device_down function sync against each
+other using tls_device_lock and the context refcount, to guarantee the
+device resources are freed via tls_dev_del() by the end of
+tls_device_down.
+
+In the following unfortunate flow, this won't happen:
+- refcount is decreased to zero in tls_device_sk_destruct.
+- tls_device_down starts, skips the context as refcount is zero, going
+  all the way until it flushes the gc work, and returns without freeing
+  the device resources.
+- only then, tls_device_queue_ctx_destruction is called, queues the gc
+  work and frees the context's device resources.
+
+Solve it by decreasing the refcount in the socket's destruction flow
+under the tls_device_lock, for perfect synchronization.  This does not
+slow down the common likely destructor flow, in which both the refcount
+is decreased and the spinlock is acquired, anyway.
+
+Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
+Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_device.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index abb93f7343c5..2c3cf47d730b 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -94,13 +94,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
+       unsigned long flags;
+       spin_lock_irqsave(&tls_device_lock, flags);
++      if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
++              goto unlock;
++
+       list_move_tail(&ctx->list, &tls_device_gc_list);
+       /* schedule_work inside the spinlock
+        * to make sure tls_device_down waits for that work.
+        */
+       schedule_work(&tls_device_gc_work);
+-
++unlock:
+       spin_unlock_irqrestore(&tls_device_lock, flags);
+ }
+@@ -191,8 +194,7 @@ static void tls_device_sk_destruct(struct sock *sk)
+               clean_acked_data_disable(inet_csk(sk));
+       }
+-      if (refcount_dec_and_test(&tls_ctx->refcount))
+-              tls_device_queue_ctx_destruction(tls_ctx);
++      tls_device_queue_ctx_destruction(tls_ctx);
+ }
+ void tls_device_free_resources_tx(struct sock *sk)
+-- 
+2.35.1
+
diff --git a/queue-5.4/perf-core-fix-data-race-between-perf_event_set_outpu.patch b/queue-5.4/perf-core-fix-data-race-between-perf_event_set_outpu.patch
new file mode 100644 (file)
index 0000000..84a2ad1
--- /dev/null
@@ -0,0 +1,167 @@
+From 59b387093f650aa5063ab8ea2a7ee98ec30ef767 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 15:07:26 +0200
+Subject: perf/core: Fix data race between perf_event_set_output() and
+ perf_mmap_close()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 68e3c69803dada336893640110cb87221bb01dcf ]
+
+Yang Jihing reported a race between perf_event_set_output() and
+perf_mmap_close():
+
+       CPU1                                    CPU2
+
+       perf_mmap_close(e2)
+         if (atomic_dec_and_test(&e2->rb->mmap_count)) // 1 - > 0
+           detach_rest = true
+
+                                               ioctl(e1, IOC_SET_OUTPUT, e2)
+                                                 perf_event_set_output(e1, e2)
+
+         ...
+         list_for_each_entry_rcu(e, &e2->rb->event_list, rb_entry)
+           ring_buffer_attach(e, NULL);
+           // e1 isn't yet added and
+           // therefore not detached
+
+                                                   ring_buffer_attach(e1, e2->rb)
+                                                     list_add_rcu(&e1->rb_entry,
+                                                                  &e2->rb->event_list)
+
+After this; e1 is attached to an unmapped rb and a subsequent
+perf_mmap() will loop forever more:
+
+       again:
+               mutex_lock(&e->mmap_mutex);
+               if (event->rb) {
+                       ...
+                       if (!atomic_inc_not_zero(&e->rb->mmap_count)) {
+                               ...
+                               mutex_unlock(&e->mmap_mutex);
+                               goto again;
+                       }
+               }
+
+The loop in perf_mmap_close() holds e2->mmap_mutex, while the attach
+in perf_event_set_output() holds e1->mmap_mutex. As such there is no
+serialization to avoid this race.
+
+Change perf_event_set_output() to take both e1->mmap_mutex and
+e2->mmap_mutex to alleviate that problem. Additionally, have the loop
+in perf_mmap() detach the rb directly, this avoids having to wait for
+the concurrent perf_mmap_close() to get around to doing it to make
+progress.
+
+Fixes: 9bb5d40cd93c ("perf: Fix mmap() accounting hole")
+Reported-by: Yang Jihong <yangjihong1@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Yang Jihong <yangjihong1@huawei.com>
+Link: https://lkml.kernel.org/r/YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 45 ++++++++++++++++++++++++++++++--------------
+ 1 file changed, 31 insertions(+), 14 deletions(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 8336dcb2bd43..0a54780e0942 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -5819,10 +5819,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+               if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
+                       /*
+-                       * Raced against perf_mmap_close() through
+-                       * perf_event_set_output(). Try again, hope for better
+-                       * luck.
++                       * Raced against perf_mmap_close(); remove the
++                       * event and try again.
+                        */
++                      ring_buffer_attach(event, NULL);
+                       mutex_unlock(&event->mmap_mutex);
+                       goto again;
+               }
+@@ -10763,14 +10763,25 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
+       goto out;
+ }
++static void mutex_lock_double(struct mutex *a, struct mutex *b)
++{
++      if (b < a)
++              swap(a, b);
++
++      mutex_lock(a);
++      mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
++}
++
+ static int
+ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+ {
+       struct ring_buffer *rb = NULL;
+       int ret = -EINVAL;
+-      if (!output_event)
++      if (!output_event) {
++              mutex_lock(&event->mmap_mutex);
+               goto set;
++      }
+       /* don't allow circular references */
+       if (event == output_event)
+@@ -10808,8 +10819,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+           event->pmu != output_event->pmu)
+               goto out;
++      /*
++       * Hold both mmap_mutex to serialize against perf_mmap_close().  Since
++       * output_event is already on rb->event_list, and the list iteration
++       * restarts after every removal, it is guaranteed this new event is
++       * observed *OR* if output_event is already removed, it's guaranteed we
++       * observe !rb->mmap_count.
++       */
++      mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
+ set:
+-      mutex_lock(&event->mmap_mutex);
+       /* Can't redirect output if we've got an active mmap() */
+       if (atomic_read(&event->mmap_count))
+               goto unlock;
+@@ -10819,6 +10837,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+               rb = ring_buffer_get(output_event);
+               if (!rb)
+                       goto unlock;
++
++              /* did we race against perf_mmap_close() */
++              if (!atomic_read(&rb->mmap_count)) {
++                      ring_buffer_put(rb);
++                      goto unlock;
++              }
+       }
+       ring_buffer_attach(event, rb);
+@@ -10826,20 +10850,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+       ret = 0;
+ unlock:
+       mutex_unlock(&event->mmap_mutex);
++      if (output_event)
++              mutex_unlock(&output_event->mmap_mutex);
+ out:
+       return ret;
+ }
+-static void mutex_lock_double(struct mutex *a, struct mutex *b)
+-{
+-      if (b < a)
+-              swap(a, b);
+-
+-      mutex_lock(a);
+-      mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+-}
+-
+ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
+ {
+       bool nmi_safe = false;
+-- 
+2.35.1
+
diff --git a/queue-5.4/pinctrl-ralink-check-for-null-return-of-devm_kcalloc.patch b/queue-5.4/pinctrl-ralink-check-for-null-return-of-devm_kcalloc.patch
new file mode 100644 (file)
index 0000000..ddadd5d
--- /dev/null
@@ -0,0 +1,43 @@
+From e6a79e28d5e1c9d726e84985da642fabeda283ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Jul 2022 23:49:22 +0800
+Subject: pinctrl: ralink: Check for null return of devm_kcalloc
+
+From: William Dean <williamsukatube@gmail.com>
+
+[ Upstream commit c3b821e8e406d5650e587b7ac624ac24e9b780a8 ]
+
+Because of the possible failure of the allocation, data->domains might
+be NULL pointer and will cause the dereference of the NULL pointer
+later.
+Therefore, it might be better to check it and directly return -ENOMEM
+without releasing data manually if fails, because the comment of the
+devm_kmalloc() says "Memory allocated with this function is
+automatically freed on driver detach.".
+
+Fixes: a86854d0c599b ("treewide: devm_kzalloc() -> devm_kcalloc()")
+Reported-by: Hacash Robot <hacashRobot@santino.com>
+Signed-off-by: William Dean <williamsukatube@gmail.com>
+Link: https://lore.kernel.org/r/20220710154922.2610876-1-williamsukatube@163.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
+index 0ba4e4e070a9..7cfbdfb10e23 100644
+--- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
++++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
+@@ -267,6 +267,8 @@ static int rt2880_pinmux_pins(struct rt2880_priv *p)
+                                               p->func[i]->pin_count,
+                                               sizeof(int),
+                                               GFP_KERNEL);
++              if (!p->func[i]->pins)
++                      return -ENOMEM;
+               for (j = 0; j < p->func[i]->pin_count; j++)
+                       p->func[i]->pins[j] = p->func[i]->pin_first + j;
+-- 
+2.35.1
+
diff --git a/queue-5.4/power-reset-arm-versatile-fix-refcount-leak-in-versa.patch b/queue-5.4/power-reset-arm-versatile-fix-refcount-leak-in-versa.patch
new file mode 100644 (file)
index 0000000..dd627b3
--- /dev/null
@@ -0,0 +1,38 @@
+From 3c0ca181f8d98f2ce60f528f5e8dc330cbbf5ffa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 May 2022 18:10:09 +0400
+Subject: power/reset: arm-versatile: Fix refcount leak in
+ versatile_reboot_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 80192eff64eee9b3bc0594a47381937b94b9d65a ]
+
+of_find_matching_node_and_match() returns a node pointer with refcount
+incremented, we should use of_node_put() on it when not need anymore.
+Add missing of_node_put() to avoid refcount leak.
+
+Fixes: 0e545f57b708 ("power: reset: driver for the Versatile syscon reboot")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/power/reset/arm-versatile-reboot.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c
+index 08d0a07b58ef..c7624d7611a7 100644
+--- a/drivers/power/reset/arm-versatile-reboot.c
++++ b/drivers/power/reset/arm-versatile-reboot.c
+@@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void)
+       versatile_reboot_type = (enum versatile_reboot)reboot_id->data;
+       syscon_regmap = syscon_node_to_regmap(np);
++      of_node_put(np);
+       if (IS_ERR(syscon_regmap))
+               return PTR_ERR(syscon_regmap);
+-- 
+2.35.1
+
index f28e67a68af98a8dce12d0c52bfff17c1412420d..cd4b457cb9d13eef72fef832999baa2cd4049bb2 100644 (file)
@@ -8,3 +8,47 @@ pci-hv-fix-hv_arch_irq_unmask-for-multi-msi.patch
 pci-hv-reuse-existing-irte-allocation-in-compose_msi_msg.patch
 pci-hv-fix-interrupt-mapping-for-multi-msi.patch
 serial-mvebu-uart-correctly-report-configured-baudrate-value.patch
+xfrm-xfrm_policy-fix-a-possible-double-xfrm_pols_put.patch
+power-reset-arm-versatile-fix-refcount-leak-in-versa.patch
+pinctrl-ralink-check-for-null-return-of-devm_kcalloc.patch
+perf-core-fix-data-race-between-perf_event_set_outpu.patch
+igc-reinstate-igc_removed-logic-and-implement-it-pro.patch
+ip-fix-data-races-around-sysctl_ip_no_pmtu_disc.patch
+ip-fix-data-races-around-sysctl_ip_fwd_use_pmtu.patch
+ip-fix-data-races-around-sysctl_ip_nonlocal_bind.patch
+ip-fix-a-data-race-around-sysctl_fwmark_reflect.patch
+tcp-dccp-fix-a-data-race-around-sysctl_tcp_fwmark_ac.patch
+tcp-fix-data-races-around-sysctl_tcp_mtu_probing.patch
+tcp-fix-data-races-around-sysctl_tcp_base_mss.patch
+tcp-fix-data-races-around-sysctl_tcp_min_snd_mss.patch
+tcp-fix-a-data-race-around-sysctl_tcp_mtu_probe_floo.patch
+tcp-fix-a-data-race-around-sysctl_tcp_probe_threshol.patch
+tcp-fix-a-data-race-around-sysctl_tcp_probe_interval.patch
+i2c-cadence-change-large-transfer-count-reset-logic-.patch
+net-stmmac-fix-dma-queue-left-shift-overflow-issue.patch
+net-tls-fix-race-in-tls-device-down-flow.patch
+igmp-fix-data-races-around-sysctl_igmp_llm_reports.patch
+igmp-fix-a-data-race-around-sysctl_igmp_max_membersh.patch
+tcp-fix-data-races-around-sysctl_tcp_syncookies.patch
+tcp-fix-data-races-around-sysctl_tcp_reordering.patch
+tcp-fix-data-races-around-some-timeout-sysctl-knobs.patch
+tcp-fix-a-data-race-around-sysctl_tcp_notsent_lowat.patch
+tcp-fix-a-data-race-around-sysctl_tcp_tw_reuse.patch
+tcp-fix-data-races-around-sysctl_max_syn_backlog.patch
+tcp-fix-data-races-around-sysctl_tcp_fastopen.patch
+iavf-fix-handling-of-dummy-receive-descriptors.patch
+i40e-fix-erroneous-adapter-reinitialization-during-r.patch
+ixgbe-add-locking-to-prevent-panic-when-setting-srio.patch
+gpio-pca953x-only-use-single-read-write-for-no-ai-mo.patch
+be2net-fix-buffer-overflow-in-be_get_module_eeprom.patch
+ipv4-fix-a-data-race-around-sysctl_fib_multipath_use.patch
+udp-fix-a-data-race-around-sysctl_udp_l3mdev_accept.patch
+tcp-fix-data-races-around-sysctl-knobs-related-to-sy.patch
+tcp-fix-a-data-race-around-sysctl_tcp_early_retrans.patch
+tcp-fix-data-races-around-sysctl_tcp_recovery.patch
+tcp-fix-a-data-race-around-sysctl_tcp_thin_linear_ti.patch
+tcp-fix-data-races-around-sysctl_tcp_slow_start_afte.patch
+tcp-fix-a-data-race-around-sysctl_tcp_retrans_collap.patch
+tcp-fix-a-data-race-around-sysctl_tcp_stdurg.patch
+tcp-fix-a-data-race-around-sysctl_tcp_rfc1337.patch
+tcp-fix-data-races-around-sysctl_tcp_max_reordering.patch
diff --git a/queue-5.4/tcp-dccp-fix-a-data-race-around-sysctl_tcp_fwmark_ac.patch b/queue-5.4/tcp-dccp-fix-a-data-race-around-sysctl_tcp_fwmark_ac.patch
new file mode 100644 (file)
index 0000000..29642f2
--- /dev/null
@@ -0,0 +1,37 @@
+From 83c6bd319d33de4fa7195acf477c4b35185aaf39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:51:58 -0700
+Subject: tcp/dccp: Fix a data-race around sysctl_tcp_fwmark_accept.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1a0008f9df59451d0a17806c1ee1a19857032fa8 ]
+
+While reading sysctl_tcp_fwmark_accept, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 84f39b08d786 ("net: support marking accepting TCP sockets")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_sock.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
+index 40f92f5a3047..58db7c69c146 100644
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
+ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
+ {
+-      if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
++      if (!sk->sk_mark &&
++          READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+               return skb->mark;
+       return sk->sk_mark;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_early_retrans.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_early_retrans.patch
new file mode 100644 (file)
index 0000000..07dd70c
--- /dev/null
@@ -0,0 +1,36 @@
+From e3e4365a029b60093e818e3bffa52f7c2cf7d470 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:45 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_early_retrans.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 52e65865deb6a36718a463030500f16530eaab74 ]
+
+While reading sysctl_tcp_early_retrans, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: eed530b6c676 ("tcp: early retransmit")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 5cc345c4006e..72ee1fca0501 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2509,7 +2509,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
+       if (rcu_access_pointer(tp->fastopen_rsk))
+               return false;
+-      early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
++      early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
+       /* Schedule a loss probe in 2*RTT for SACK capable connections
+        * not in loss recovery, that are either limited by cwnd or application.
+        */
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_mtu_probe_floo.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_mtu_probe_floo.patch
new file mode 100644 (file)
index 0000000..7f9f782
--- /dev/null
@@ -0,0 +1,36 @@
+From ce89233ec2c89f46ca14297d779be9fb205a7765 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:52:03 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_mtu_probe_floor.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8e92d4423615a5257d0d871fc067aa561f597deb ]
+
+While reading sysctl_tcp_mtu_probe_floor, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: c04b79b6cfd7 ("tcp: add new tcp_mtu_probe_floor sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_timer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 0460c5deee3f..c48aeaef3ec7 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -172,7 +172,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+       } else {
+               mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+               mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+-              mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
++              mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+               mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
+               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_notsent_lowat.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_notsent_lowat.patch
new file mode 100644 (file)
index 0000000..03934fe
--- /dev/null
@@ -0,0 +1,36 @@
+From 857b80af4b47175495fdfcaa67ad52f9d34c34aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:51 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_notsent_lowat.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 55be873695ed8912eb77ff46d1d1cadf028bd0f3 ]
+
+While reading sysctl_tcp_notsent_lowat, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 96dae0937998..eb984ec22f22 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1947,7 +1947,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
+ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
+ {
+       struct net *net = sock_net((struct sock *)tp);
+-      return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
++      return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ }
+ /* @wake is one when sk_stream_write_space() calls us.
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_probe_interval.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_probe_interval.patch
new file mode 100644 (file)
index 0000000..dc654bd
--- /dev/null
@@ -0,0 +1,36 @@
+From 243a2354e4522b638ecdfd08c78f2f7f122991ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:52:05 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_probe_interval.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2a85388f1d94a9f8b5a529118a2c5eaa0520d85c ]
+
+While reading sysctl_tcp_probe_interval, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 05cbc0db03e8 ("ipv4: Create probe timer for tcp PMTU as per RFC4821")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index e60cb69d00a4..9bfe6965b873 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2052,7 +2052,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
+       u32 interval;
+       s32 delta;
+-      interval = net->ipv4.sysctl_tcp_probe_interval;
++      interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
+       delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
+       if (unlikely(delta >= interval * HZ)) {
+               int mss = tcp_current_mss(sk);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_probe_threshol.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_probe_threshol.patch
new file mode 100644 (file)
index 0000000..e691d7b
--- /dev/null
@@ -0,0 +1,36 @@
+From b66b2c98a5f4ac18680fc2265b990077df2b1e47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:52:04 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_probe_threshold.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 92c0aa4175474483d6cf373314343d4e624e882a ]
+
+While reading sysctl_tcp_probe_threshold, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 6b58e0a5f32d ("ipv4: Use binary search to choose tcp PMTU probe_size")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 7c0b96319fc0..e60cb69d00a4 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2134,7 +2134,7 @@ static int tcp_mtu_probe(struct sock *sk)
+        * probing process by not resetting search range to its orignal.
+        */
+       if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+-              interval < net->ipv4.sysctl_tcp_probe_threshold) {
++          interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
+               /* Check whether enough time has elaplased for
+                * another round of probing.
+                */
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_retrans_collap.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_retrans_collap.patch
new file mode 100644 (file)
index 0000000..1da5249
--- /dev/null
@@ -0,0 +1,36 @@
+From 00b3cf63d73ec59bd83f5a234031be05858a619b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:49 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_retrans_collapse.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1a63cb91f0c2fcdeced6d6edee8d1d886583d139 ]
+
+While reading sysctl_tcp_retrans_collapse, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 5d9a1a498a18..97f29ece3800 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2871,7 +2871,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
+       struct sk_buff *skb = to, *tmp;
+       bool first = true;
+-      if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
++      if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
+               return;
+       if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+               return;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_rfc1337.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_rfc1337.patch
new file mode 100644 (file)
index 0000000..2285c80
--- /dev/null
@@ -0,0 +1,36 @@
+From 16d469af13e0c6b36361ab381a9c245fe4a1b73e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:51 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_rfc1337.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 0b484c91911e758e53656d570de58c2ed81ec6f2 ]
+
+While reading sysctl_tcp_rfc1337, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_minisocks.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index 9b038cb0a43d..324f43fadb37 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
+                        * Oh well... nobody has a sufficient solution to this
+                        * protocol bug yet.
+                        */
+-                      if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
++                      if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
+ kill:
+                               inet_twsk_deschedule_put(tw);
+                               return TCP_TW_SUCCESS;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_stdurg.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_stdurg.patch
new file mode 100644 (file)
index 0000000..154e8be
--- /dev/null
@@ -0,0 +1,36 @@
+From 32c49dbccbf60bd69bb992cac40804f96c870b55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:50 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_stdurg.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4e08ed41cb1194009fc1a916a59ce3ed4afd77cd ]
+
+While reading sysctl_tcp_stdurg, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 2f57c365ebd5..f9884956aa13 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5356,7 +5356,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
+       struct tcp_sock *tp = tcp_sk(sk);
+       u32 ptr = ntohs(th->urg_ptr);
+-      if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
++      if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
+               ptr--;
+       ptr += ntohl(th->seq);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_thin_linear_ti.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_thin_linear_ti.patch
new file mode 100644 (file)
index 0000000..4c8f08c
--- /dev/null
@@ -0,0 +1,36 @@
+From b6f4764904d9e736066cf187904ab91bd30b6d92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:47 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_thin_linear_timeouts.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 7c6f2a86ca590d5187a073d987e9599985fb1c7c ]
+
+While reading sysctl_tcp_thin_linear_timeouts, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 36e31b0af587 ("net: TCP thin linear timeouts")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_timer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 26da44e196ed..a0107eb02ae4 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -569,7 +569,7 @@ void tcp_retransmit_timer(struct sock *sk)
+        * linear-timeout retransmissions into a black hole
+        */
+       if (sk->sk_state == TCP_ESTABLISHED &&
+-          (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
++          (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
+           tcp_stream_is_thin(tp) &&
+           icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+               icsk->icsk_backoff = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_tw_reuse.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_tw_reuse.patch
new file mode 100644 (file)
index 0000000..c5517f7
--- /dev/null
@@ -0,0 +1,39 @@
+From 627c0d8697102bc53b3e2315b26cce876cb0c25c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:52 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_tw_reuse.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit cbfc6495586a3f09f6f07d9fb3c7cafe807e3c55 ]
+
+While reading sysctl_tcp_tw_reuse, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 72fe93ace7d7..b95e1a3487c8 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -105,10 +105,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
+ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
+ {
++      int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
+       const struct inet_timewait_sock *tw = inet_twsk(sktw);
+       const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+       struct tcp_sock *tp = tcp_sk(sk);
+-      int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
+       if (reuse == 2) {
+               /* Still does not detect *everything* that goes through
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-some-timeout-sysctl-knobs.patch b/queue-5.4/tcp-fix-data-races-around-some-timeout-sysctl-knobs.patch
new file mode 100644 (file)
index 0000000..6551df3
--- /dev/null
@@ -0,0 +1,120 @@
+From 95c2ba376c7d25e168318c342c93d19223d83abe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:50 -0700
+Subject: tcp: Fix data-races around some timeout sysctl knobs.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 39e24435a776e9de5c6dd188836cf2523547804b ]
+
+While reading these sysctl knobs, they can be changed concurrently.
+Thus, we need to add READ_ONCE() to their readers.
+
+  - tcp_retries1
+  - tcp_retries2
+  - tcp_orphan_retries
+  - tcp_fin_timeout
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h     |  3 ++-
+ net/ipv4/tcp.c        |  2 +-
+ net/ipv4/tcp_output.c |  2 +-
+ net/ipv4/tcp_timer.c  | 10 +++++-----
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 65be8bd1f0f4..96dae0937998 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1465,7 +1465,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+ static inline int tcp_fin_time(const struct sock *sk)
+ {
+-      int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
++      int fin_timeout = tcp_sk(sk)->linger2 ? :
++              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
+       const int rto = inet_csk(sk)->icsk_rto;
+       if (fin_timeout < (rto << 2) - (rto >> 1))
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 790246011fff..333d221e0717 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3466,7 +3466,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+       case TCP_LINGER2:
+               val = tp->linger2;
+               if (val >= 0)
+-                      val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
++                      val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+               break;
+       case TCP_DEFER_ACCEPT:
+               val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 9bfe6965b873..8b602a202acb 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3847,7 +3847,7 @@ void tcp_send_probe0(struct sock *sk)
+       icsk->icsk_probes_out++;
+       if (err <= 0) {
+-              if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
++              if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
+                       icsk->icsk_backoff++;
+               timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
+       } else {
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index c48aeaef3ec7..26da44e196ed 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
+  */
+ static int tcp_orphan_retries(struct sock *sk, bool alive)
+ {
+-      int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
++      int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
+       /* We know from an ICMP that something is wrong. */
+       if (sk->sk_err_soft && !alive)
+@@ -245,7 +245,7 @@ static int tcp_write_timeout(struct sock *sk)
+               retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+               expired = icsk->icsk_retransmits >= retry_until;
+       } else {
+-              if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
++              if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
+                       /* Black hole detection */
+                       tcp_mtu_probing(icsk, sk);
+@@ -254,7 +254,7 @@ static int tcp_write_timeout(struct sock *sk)
+                       sk_rethink_txhash(sk);
+               }
+-              retry_until = net->ipv4.sysctl_tcp_retries2;
++              retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
+               if (sock_flag(sk, SOCK_DEAD)) {
+                       const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
+@@ -381,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk)
+                msecs_to_jiffies(icsk->icsk_user_timeout))
+               goto abort;
+-      max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
++      max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
+       if (sock_flag(sk, SOCK_DEAD)) {
+               const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
+@@ -580,7 +580,7 @@ void tcp_retransmit_timer(struct sock *sk)
+       }
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                 tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
+-      if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
++      if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
+               __sk_dst_reset(sk);
+ out:;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl-knobs-related-to-sy.patch b/queue-5.4/tcp-fix-data-races-around-sysctl-knobs-related-to-sy.patch
new file mode 100644 (file)
index 0000000..de88cfc
--- /dev/null
@@ -0,0 +1,180 @@
+From 252b4cce3db79ae796b7efa89e05c13bdd7d9d4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:44 -0700
+Subject: tcp: Fix data-races around sysctl knobs related to SYN option.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 3666f666e99600518ab20982af04a078bbdad277 ]
+
+While reading these knobs, they can be changed concurrently.
+Thus, we need to add READ_ONCE() to their readers.
+
+  - tcp_sack
+  - tcp_window_scaling
+  - tcp_timestamps
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/crypto/chelsio/chtls/chtls_cm.c |  6 +++---
+ net/core/secure_seq.c                   |  4 ++--
+ net/ipv4/syncookies.c                   |  6 +++---
+ net/ipv4/tcp_input.c                    |  6 +++---
+ net/ipv4/tcp_output.c                   | 10 +++++-----
+ 5 files changed, 16 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
+index 82b76df43ae5..3b79bcd03e7b 100644
+--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
+@@ -1103,8 +1103,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
+       csk->sndbuf = newsk->sk_sndbuf;
+       csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
+       RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
+-                                         sock_net(newsk)->
+-                                              ipv4.sysctl_tcp_window_scaling,
++                                         READ_ONCE(sock_net(newsk)->
++                                                   ipv4.sysctl_tcp_window_scaling),
+                                          tp->window_clamp);
+       neigh_release(n);
+       inet_inherit_port(&tcp_hashinfo, lsk, newsk);
+@@ -1235,7 +1235,7 @@ static void chtls_pass_accept_request(struct sock *sk,
+       chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
+       ip_dsfield = ipv4_get_dsfield(iph);
+       if (req->tcpopt.wsf <= 14 &&
+-          sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
++          READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+               inet_rsk(oreq)->wscale_ok = 1;
+               inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
+       }
+diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
+index a1867c65ac63..6d86506e315f 100644
+--- a/net/core/secure_seq.c
++++ b/net/core/secure_seq.c
+@@ -65,7 +65,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
+               .daddr = *(struct in6_addr *)daddr,
+       };
+-      if (net->ipv4.sysctl_tcp_timestamps != 1)
++      if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+               return 0;
+       ts_secret_init();
+@@ -121,7 +121,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
+ #ifdef CONFIG_INET
+ u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
+ {
+-      if (net->ipv4.sysctl_tcp_timestamps != 1)
++      if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+               return 0;
+       ts_secret_init();
+diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
+index f1cbf8911844..3f6c9514c7a9 100644
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -243,12 +243,12 @@ bool cookie_timestamp_decode(const struct net *net,
+               return true;
+       }
+-      if (!net->ipv4.sysctl_tcp_timestamps)
++      if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
+               return false;
+       tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
+-      if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
++      if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
+               return false;
+       if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
+@@ -257,7 +257,7 @@ bool cookie_timestamp_decode(const struct net *net,
+       tcp_opt->wscale_ok = 1;
+       tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
+-      return net->ipv4.sysctl_tcp_window_scaling != 0;
++      return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
+ }
+ EXPORT_SYMBOL(cookie_timestamp_decode);
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index c1f26603cd2c..28df6c3feb3f 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3906,7 +3906,7 @@ void tcp_parse_options(const struct net *net,
+                               break;
+                       case TCPOPT_WINDOW:
+                               if (opsize == TCPOLEN_WINDOW && th->syn &&
+-                                  !estab && net->ipv4.sysctl_tcp_window_scaling) {
++                                  !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
+                                       __u8 snd_wscale = *(__u8 *)ptr;
+                                       opt_rx->wscale_ok = 1;
+                                       if (snd_wscale > TCP_MAX_WSCALE) {
+@@ -3922,7 +3922,7 @@ void tcp_parse_options(const struct net *net,
+                       case TCPOPT_TIMESTAMP:
+                               if ((opsize == TCPOLEN_TIMESTAMP) &&
+                                   ((estab && opt_rx->tstamp_ok) ||
+-                                   (!estab && net->ipv4.sysctl_tcp_timestamps))) {
++                                   (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
+                                       opt_rx->saw_tstamp = 1;
+                                       opt_rx->rcv_tsval = get_unaligned_be32(ptr);
+                                       opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
+@@ -3930,7 +3930,7 @@ void tcp_parse_options(const struct net *net,
+                               break;
+                       case TCPOPT_SACK_PERM:
+                               if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+-                                  !estab && net->ipv4.sysctl_tcp_sack) {
++                                  !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
+                                       opt_rx->sack_ok = TCP_SACK_SEEN;
+                                       tcp_sack_reset(opt_rx);
+                               }
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 8b602a202acb..5cc345c4006e 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -620,18 +620,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+       opts->mss = tcp_advertise_mss(sk);
+       remaining -= TCPOLEN_MSS_ALIGNED;
+-      if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
++      if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
+               opts->options |= OPTION_TS;
+               opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
+               opts->tsecr = tp->rx_opt.ts_recent;
+               remaining -= TCPOLEN_TSTAMP_ALIGNED;
+       }
+-      if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
++      if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
+               opts->ws = tp->rx_opt.rcv_wscale;
+               opts->options |= OPTION_WSCALE;
+               remaining -= TCPOLEN_WSCALE_ALIGNED;
+       }
+-      if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
++      if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
+               opts->options |= OPTION_SACK_ADVERTISE;
+               if (unlikely(!(OPTION_TS & opts->options)))
+                       remaining -= TCPOLEN_SACKPERM_ALIGNED;
+@@ -3407,7 +3407,7 @@ static void tcp_connect_init(struct sock *sk)
+        * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+        */
+       tp->tcp_header_len = sizeof(struct tcphdr);
+-      if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
+               tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+ #ifdef CONFIG_TCP_MD5SIG
+@@ -3443,7 +3443,7 @@ static void tcp_connect_init(struct sock *sk)
+                                 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+                                 &tp->rcv_wnd,
+                                 &tp->window_clamp,
+-                                sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
++                                READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
+                                 &rcv_wscale,
+                                 rcv_wnd);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_max_syn_backlog.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_max_syn_backlog.patch
new file mode 100644 (file)
index 0000000..bae39c1
--- /dev/null
@@ -0,0 +1,42 @@
+From b200a27672b95b24ef34bf4f3e288e70065733a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:53 -0700
+Subject: tcp: Fix data-races around sysctl_max_syn_backlog.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 79539f34743d3e14cc1fa6577d326a82cc64d62f ]
+
+While reading sysctl_max_syn_backlog, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index fbdb5de29a97..c1f26603cd2c 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6676,10 +6676,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
+               goto drop_and_free;
+       if (!want_cookie && !isn) {
++              int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
++
+               /* Kill the following clause, if you dislike this way. */
+               if (!syncookies &&
+-                  (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+-                   (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
++                  (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
++                   (max_syn_backlog >> 2)) &&
+                   !tcp_peer_is_proven(req, dst)) {
+                       /* Without syncookies last quarter of
+                        * backlog is filled with destinations,
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_base_mss.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_base_mss.patch
new file mode 100644 (file)
index 0000000..69d5f12
--- /dev/null
@@ -0,0 +1,50 @@
+From 9a54d8626f93bbc27040635df1601473bb96559a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:52:01 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_base_mss.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 88d78bc097cd8ebc6541e93316c9d9bf651b13e8 ]
+
+While reading sysctl_tcp_base_mss, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 5d424d5a674f ("[TCP]: MTU probing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ net/ipv4/tcp_timer.c  | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 5ac81c4f076d..b84bedf2804a 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1540,7 +1540,7 @@ void tcp_mtup_init(struct sock *sk)
+       icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
+       icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
+                              icsk->icsk_af_ops->net_header_len;
+-      icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
++      icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
+       icsk->icsk_mtup.probe_size = 0;
+       if (icsk->icsk_mtup.enabled)
+               icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 57fa707e9e98..0c3ee2aa244f 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -171,7 +171,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+               icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+       } else {
+               mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+-              mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
++              mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+               mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
+               mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_fastopen.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_fastopen.patch
new file mode 100644 (file)
index 0000000..22b8e48
--- /dev/null
@@ -0,0 +1,85 @@
+From 8e542e1ed1a022a260dfba3ded877822efb6aad6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:54 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_fastopen.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 5a54213318c43f4009ae158347aa6016e3b9b55a ]
+
+While reading sysctl_tcp_fastopen, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 2100c8d2d9db ("net-tcp: Fast Open base")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/af_inet.c      | 2 +-
+ net/ipv4/tcp.c          | 6 ++++--
+ net/ipv4/tcp_fastopen.c | 4 ++--
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 06153386776d..d61ca7be6eda 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -219,7 +219,7 @@ int inet_listen(struct socket *sock, int backlog)
+                * because the socket was in TCP_LISTEN state previously but
+                * was shutdown() rather than close().
+                */
+-              tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
++              tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
+               if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+                   (tcp_fastopen & TFO_SERVER_ENABLE) &&
+                   !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 333d221e0717..4b31f6e9ec61 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1148,7 +1148,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+       struct sockaddr *uaddr = msg->msg_name;
+       int err, flags;
+-      if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
++      if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
++            TFO_CLIENT_ENABLE) ||
+           (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+            uaddr->sa_family == AF_UNSPEC))
+               return -EOPNOTSUPP;
+@@ -3127,7 +3128,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+       case TCP_FASTOPEN_CONNECT:
+               if (val > 1 || val < 0) {
+                       err = -EINVAL;
+-              } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
++              } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
++                         TFO_CLIENT_ENABLE) {
+                       if (sk->sk_state == TCP_CLOSE)
+                               tp->fastopen_connect = val;
+                       else
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index a5ec77a5ad6f..21705b2ddaff 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -349,7 +349,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
+                                  const struct dst_entry *dst,
+                                  int flag)
+ {
+-      return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
++      return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
+              tcp_sk(sk)->fastopen_no_cookie ||
+              (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
+ }
+@@ -364,7 +364,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+                             const struct dst_entry *dst)
+ {
+       bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+-      int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
++      int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
+       struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+       struct sock *child;
+       int ret = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_max_reordering.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_max_reordering.patch
new file mode 100644 (file)
index 0000000..7f6f940
--- /dev/null
@@ -0,0 +1,45 @@
+From 41c99f44aa9caafc2c984a5baf91204f3fb97f51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:53 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_max_reordering.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit a11e5b3e7a59fde1a90b0eaeaa82320495cf8cae ]
+
+While reading sysctl_tcp_max_reordering, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: dca145ffaa8d ("tcp: allow for bigger reordering level")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index f9884956aa13..c151c4dd4ae6 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -905,7 +905,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+                        tp->undo_marker ? tp->undo_retrans : 0);
+ #endif
+               tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+-                                     sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
++                                     READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
+       }
+       /* This exciting event is worth to be remembered. 8) */
+@@ -1886,7 +1886,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+               return;
+       tp->reordering = min_t(u32, tp->packets_out + addend,
+-                             sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
++                             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
+       tp->reord_seen++;
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_min_snd_mss.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_min_snd_mss.patch
new file mode 100644 (file)
index 0000000..155ef82
--- /dev/null
@@ -0,0 +1,51 @@
+From ad55c89b3ebeb830beaee3b36b4d275d92d13ed8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:52:02 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_min_snd_mss.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 78eb166cdefcc3221c8c7c1e2d514e91a2eb5014 ]
+
+While reading sysctl_tcp_min_snd_mss, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 5f3e2bf008c2 ("tcp: add tcp_min_snd_mss sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 3 ++-
+ net/ipv4/tcp_timer.c  | 2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index b84bedf2804a..7c0b96319fc0 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1494,7 +1494,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
+       mss_now -= icsk->icsk_ext_hdr_len;
+       /* Then reserve room for full set of TCP options and 8 bytes of data */
+-      mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
++      mss_now = max(mss_now,
++                    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
+       return mss_now;
+ }
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 0c3ee2aa244f..0460c5deee3f 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -173,7 +173,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+               mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+               mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+               mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
+-              mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
++              mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
+               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+       }
+       tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_mtu_probing.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_mtu_probing.patch
new file mode 100644 (file)
index 0000000..ac5f4f1
--- /dev/null
@@ -0,0 +1,50 @@
+From ce744070e47a82f3e9057d1e75c2607603a57d57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 13:52:00 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_mtu_probing.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit f47d00e077e7d61baf69e46dde3210c886360207 ]
+
+While reading sysctl_tcp_mtu_probing, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 5d424d5a674f ("[TCP]: MTU probing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ net/ipv4/tcp_timer.c  | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 739fc69cdcc6..5ac81c4f076d 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1537,7 +1537,7 @@ void tcp_mtup_init(struct sock *sk)
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
+-      icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
++      icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
+       icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
+                              icsk->icsk_af_ops->net_header_len;
+       icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index fa2ae96ecdc4..57fa707e9e98 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+       int mss;
+       /* Black hole detection */
+-      if (!net->ipv4.sysctl_tcp_mtu_probing)
++      if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
+               return;
+       if (!icsk->icsk_mtup.enabled) {
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_recovery.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_recovery.patch
new file mode 100644 (file)
index 0000000..eba448b
--- /dev/null
@@ -0,0 +1,62 @@
+From f2112ea892b24e60c9b602e9a2181f3b8db1f863 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:46 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_recovery.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e7d2ef837e14a971a05f60ea08c47f3fed1a36e4 ]
+
+While reading sysctl_tcp_recovery, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 4f41b1c58a32 ("tcp: use RACK to detect losses")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c    | 3 ++-
+ net/ipv4/tcp_recovery.c | 6 ++++--
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 28df6c3feb3f..2f57c365ebd5 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1950,7 +1950,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
+ static bool tcp_is_rack(const struct sock *sk)
+ {
+-      return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
++      return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
++              TCP_RACK_LOSS_DETECTION;
+ }
+ /* If we detect SACK reneging, forget all SACK information
+diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
+index 8757bb6cb1d9..22ec8dcc1428 100644
+--- a/net/ipv4/tcp_recovery.c
++++ b/net/ipv4/tcp_recovery.c
+@@ -33,7 +33,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
+                       return 0;
+               if (tp->sacked_out >= tp->reordering &&
+-                  !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
++                  !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
++                    TCP_RACK_NO_DUPTHRESH))
+                       return 0;
+       }
+@@ -204,7 +205,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+-      if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
++      if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
++           TCP_RACK_STATIC_REO_WND) ||
+           !rs->prior_delivered)
+               return;
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_reordering.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_reordering.patch
new file mode 100644 (file)
index 0000000..ffc079e
--- /dev/null
@@ -0,0 +1,89 @@
+From 80e12f18946a8e2b02884568789f74825c96816d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:49 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_reordering.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 46778cd16e6a5ad1b2e3a91f6c057c907379418e ]
+
+While reading sysctl_tcp_reordering, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c         |  2 +-
+ net/ipv4/tcp_input.c   | 10 +++++++---
+ net/ipv4/tcp_metrics.c |  3 ++-
+ 3 files changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 4815cf72569e..790246011fff 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -437,7 +437,7 @@ void tcp_init_sock(struct sock *sk)
+       tp->snd_cwnd_clamp = ~0;
+       tp->mss_cache = TCP_MSS_DEFAULT;
+-      tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
++      tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
+       tcp_assign_congestion_control(sk);
+       tp->tsoffset = 0;
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 85204903b2fa..fbdb5de29a97 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1994,6 +1994,7 @@ void tcp_enter_loss(struct sock *sk)
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
+       bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
++      u8 reordering;
+       tcp_timeout_mark_lost(sk);
+@@ -2014,10 +2015,12 @@ void tcp_enter_loss(struct sock *sk)
+       /* Timeout in disordered state after receiving substantial DUPACKs
+        * suggests that the degree of reordering is over-estimated.
+        */
++      reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
+       if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
+-          tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
++          tp->sacked_out >= reordering)
+               tp->reordering = min_t(unsigned int, tp->reordering,
+-                                     net->ipv4.sysctl_tcp_reordering);
++                                     reordering);
++
+       tcp_set_ca_state(sk, TCP_CA_Loss);
+       tp->high_seq = tp->snd_nxt;
+       tcp_ecn_queue_cwr(tp);
+@@ -3319,7 +3322,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
+        * new SACK or ECE mark may first advance cwnd here and later reduce
+        * cwnd in tcp_fastretrans_alert() based on more states.
+        */
+-      if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
++      if (tcp_sk(sk)->reordering >
++          READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
+               return flag & FLAG_FORWARD_PROGRESS;
+       return flag & FLAG_DATA_ACKED;
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index c4848e7a0aad..9a7d8a599857 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -425,7 +425,8 @@ void tcp_update_metrics(struct sock *sk)
+               if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
+                       if (val < tp->reordering &&
+-                          tp->reordering != net->ipv4.sysctl_tcp_reordering)
++                          tp->reordering !=
++                          READ_ONCE(net->ipv4.sysctl_tcp_reordering))
+                               tcp_metric_set(tm, TCP_METRIC_REORDERING,
+                                              tp->reordering);
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_slow_start_afte.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_slow_start_afte.patch
new file mode 100644 (file)
index 0000000..5e385a3
--- /dev/null
@@ -0,0 +1,52 @@
+From da75332bb257b6883ce7d2e06cb33b71b4db1188 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:48 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_slow_start_after_idle.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4845b5713ab18a1bb6e31d1fbb4d600240b8b691 ]
+
+While reading sysctl_tcp_slow_start_after_idle, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 35089bb203f4 ("[TCP]: Add tcp_slow_start_after_idle sysctl.")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h     | 4 ++--
+ net/ipv4/tcp_output.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index eb984ec22f22..aaf1d5d5a13b 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1373,8 +1373,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
+       struct tcp_sock *tp = tcp_sk(sk);
+       s32 delta;
+-      if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
+-          ca_ops->cong_control)
++      if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
++          tp->packets_out || ca_ops->cong_control)
+               return;
+       delta = tcp_jiffies32 - tp->lsndtime;
+       if (delta > inet_csk(sk)->icsk_rto)
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 72ee1fca0501..5d9a1a498a18 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1673,7 +1673,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
+               if (tp->packets_out > tp->snd_cwnd_used)
+                       tp->snd_cwnd_used = tp->packets_out;
+-              if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
++              if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
+                   (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+                   !ca_ops->cong_control)
+                       tcp_cwnd_application_limited(sk);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_syncookies.patch b/queue-5.4/tcp-fix-data-races-around-sysctl_tcp_syncookies.patch
new file mode 100644 (file)
index 0000000..0830e88
--- /dev/null
@@ -0,0 +1,143 @@
+From 5bcefd7ec48639f44b7e07f8e9bbe486342d5786 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:47 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_syncookies.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit f2e383b5bb6bbc60a0b94b87b3e49a2b1aefd11e ]
+
+While reading sysctl_tcp_syncookies, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c     |  4 ++--
+ net/ipv4/syncookies.c |  3 ++-
+ net/ipv4/tcp_input.c  | 20 ++++++++++++--------
+ net/ipv6/syncookies.c |  3 ++-
+ 4 files changed, 18 insertions(+), 12 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 75f53b5e6389..72bf78032f45 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -5839,7 +5839,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
+       if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+               return -EINVAL;
+-      if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
++      if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
+               return -EINVAL;
+       if (!th->ack || th->rst || th->syn)
+@@ -5914,7 +5914,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+       if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+               return -EINVAL;
+-      if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
++      if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
+               return -ENOENT;
+       if (!th->syn || th->ack || th->fin || th->rst)
+diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
+index 6811174ad518..f1cbf8911844 100644
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -297,7 +297,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+       struct flowi4 fl4;
+       u32 tsoff = 0;
+-      if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
++      if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
++          !th->ack || th->rst)
+               goto out;
+       if (tcp_synq_no_recent_overflow(sk))
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 0808110451a0..85204903b2fa 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6530,11 +6530,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+ {
+       struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+       const char *msg = "Dropping request";
+-      bool want_cookie = false;
+       struct net *net = sock_net(sk);
++      bool want_cookie = false;
++      u8 syncookies;
++
++      syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+ #ifdef CONFIG_SYN_COOKIES
+-      if (net->ipv4.sysctl_tcp_syncookies) {
++      if (syncookies) {
+               msg = "Sending cookies";
+               want_cookie = true;
+               __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+@@ -6542,8 +6545,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+ #endif
+               __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+-      if (!queue->synflood_warned &&
+-          net->ipv4.sysctl_tcp_syncookies != 2 &&
++      if (!queue->synflood_warned && syncookies != 2 &&
+           xchg(&queue->synflood_warned, 1) == 0)
+               net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
+                                    proto, sk->sk_num, msg);
+@@ -6578,7 +6580,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+       struct tcp_sock *tp = tcp_sk(sk);
+       u16 mss;
+-      if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
+           !inet_csk_reqsk_queue_is_full(sk))
+               return 0;
+@@ -6612,13 +6614,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
+       bool want_cookie = false;
+       struct dst_entry *dst;
+       struct flowi fl;
++      u8 syncookies;
++
++      syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+       /* TW buckets are converted to open requests without
+        * limitations, they conserve resources and peer is
+        * evidently real one.
+        */
+-      if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
+-           inet_csk_reqsk_queue_is_full(sk)) && !isn) {
++      if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+               want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
+               if (!want_cookie)
+                       goto drop;
+@@ -6669,7 +6673,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
+       if (!want_cookie && !isn) {
+               /* Kill the following clause, if you dislike this way. */
+-              if (!net->ipv4.sysctl_tcp_syncookies &&
++              if (!syncookies &&
+                   (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                    (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+                   !tcp_peer_is_proven(req, dst)) {
+diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
+index 37ab254f7b92..7e5550546594 100644
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+       __u8 rcv_wscale;
+       u32 tsoff = 0;
+-      if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
++      if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
++          !th->ack || th->rst)
+               goto out;
+       if (tcp_synq_no_recent_overflow(sk))
+-- 
+2.35.1
+
diff --git a/queue-5.4/udp-fix-a-data-race-around-sysctl_udp_l3mdev_accept.patch b/queue-5.4/udp-fix-a-data-race-around-sysctl_udp_l3mdev_accept.patch
new file mode 100644 (file)
index 0000000..e86c474
--- /dev/null
@@ -0,0 +1,36 @@
+From aa536ed11ad542a8feecad6cb55e7556d2cc7c2d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 10:26:43 -0700
+Subject: udp: Fix a data-race around sysctl_udp_l3mdev_accept.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 3d72bb4188c708bb16758c60822fc4dda7a95174 ]
+
+While reading sysctl_udp_l3mdev_accept, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 63a6fff353d0 ("net: Avoid receiving packets with an l3mdev on unbound UDP sockets")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/udp.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/udp.h b/include/net/udp.h
+index 9787a42f7ed3..e66854e767dc 100644
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -252,7 +252,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+                                      int dif, int sdif)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+-      return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
++      return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
+                                bound_dev_if, dif, sdif);
+ #else
+       return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+-- 
+2.35.1
+
diff --git a/queue-5.4/xfrm-xfrm_policy-fix-a-possible-double-xfrm_pols_put.patch b/queue-5.4/xfrm-xfrm_policy-fix-a-possible-double-xfrm_pols_put.patch
new file mode 100644 (file)
index 0000000..c0b93a1
--- /dev/null
@@ -0,0 +1,58 @@
+From 4fed3a6e64b879e87b378eaab3e5b996aa11edac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Jun 2022 14:46:25 +0800
+Subject: xfrm: xfrm_policy: fix a possible double xfrm_pols_put() in
+ xfrm_bundle_lookup()
+
+From: Hangyu Hua <hbh25y@gmail.com>
+
+[ Upstream commit f85daf0e725358be78dfd208dea5fd665d8cb901 ]
+
+xfrm_policy_lookup() will call xfrm_pol_hold_rcu() to get a refcount of
+pols[0]. This refcount can be dropped in xfrm_expand_policies() when
+xfrm_expand_policies() return error. pols[0]'s refcount is balanced in
+here. But xfrm_bundle_lookup() will also call xfrm_pols_put() with
+num_pols == 1 to drop this refcount when xfrm_expand_policies() return
+error.
+
+This patch also fix an illegal address access. pols[0] will save a error
+point when xfrm_policy_lookup fails. This lead to xfrm_pols_put to resolve
+an illegal address in xfrm_bundle_lookup's error path.
+
+Fix these by setting num_pols = 0 in xfrm_expand_policies()'s error path.
+
+Fixes: 80c802f3073e ("xfrm: cache bundles instead of policies for outgoing flows")
+Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_policy.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 3ecb77c58c44..28a8cdef8e51 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -2679,8 +2679,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
+               *num_xfrms = 0;
+               return 0;
+       }
+-      if (IS_ERR(pols[0]))
++      if (IS_ERR(pols[0])) {
++              *num_pols = 0;
+               return PTR_ERR(pols[0]);
++      }
+       *num_xfrms = pols[0]->xfrm_nr;
+@@ -2695,6 +2697,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
+               if (pols[1]) {
+                       if (IS_ERR(pols[1])) {
+                               xfrm_pols_put(pols, *num_pols);
++                              *num_pols = 0;
+                               return PTR_ERR(pols[1]);
+                       }
+                       (*num_pols)++;
+-- 
+2.35.1
+