--- /dev/null
+From b12764695c3fcade145890b67f82f8b139174cc7 Mon Sep 17 00:00:00 2001
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+Date: Sat, 27 Nov 2021 21:42:14 +0200
+Subject: i2c: cbus-gpio: set atomic transfer callback
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+commit b12764695c3fcade145890b67f82f8b139174cc7 upstream.
+
+CBUS transfers have always been atomic, but after commit 63b96983a5dd
+("i2c: core: introduce callbacks for atomic transfers") we started to see
+warnings during e.g. poweroff as the atomic callback is not explicitly set.
+Fix that.
+
+Fixes the following WARNING seen during Nokia N810 power down:
+
+[ 786.570617] reboot: Power down
+[ 786.573913] ------------[ cut here ]------------
+[ 786.578826] WARNING: CPU: 0 PID: 672 at drivers/i2c/i2c-core.h:40 i2c_smbus_xfer+0x100/0x110
+[ 786.587799] No atomic I2C transfer handler for 'i2c-2'
+
+Fixes: 63b96983a5dd ("i2c: core: introduce callbacks for atomic transfers")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-cbus-gpio.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-cbus-gpio.c
++++ b/drivers/i2c/busses/i2c-cbus-gpio.c
+@@ -195,8 +195,9 @@ static u32 cbus_i2c_func(struct i2c_adap
+ }
+
+ static const struct i2c_algorithm cbus_i2c_algo = {
+- .smbus_xfer = cbus_i2c_smbus_xfer,
+- .functionality = cbus_i2c_func,
++ .smbus_xfer = cbus_i2c_smbus_xfer,
++ .smbus_xfer_atomic = cbus_i2c_smbus_xfer,
++ .functionality = cbus_i2c_func,
+ };
+
+ static int cbus_i2c_remove(struct platform_device *pdev)
--- /dev/null
+From 0c21d02ca469574d2082379db52d1a27b99eed0c Mon Sep 17 00:00:00 2001
+From: Alain Volmat <alain.volmat@foss.st.com>
+Date: Mon, 20 Sep 2021 17:21:29 +0200
+Subject: i2c: stm32f7: flush TX FIFO upon transfer errors
+
+From: Alain Volmat <alain.volmat@foss.st.com>
+
+commit 0c21d02ca469574d2082379db52d1a27b99eed0c upstream.
+
+While handling an error during transfer (ex: NACK), it could
+happen that the driver has already written data into TXDR
+before the transfer get stopped.
+This commit add TXDR Flush after end of transfer in case of error to
+avoid sending a wrong data on any other slave upon next transfer.
+
+Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver")
+Signed-off-by: Alain Volmat <alain.volmat@foss.st.com>
+Reviewed-by: Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-stm32f7.c | 20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/drivers/i2c/busses/i2c-stm32f7.c
++++ b/drivers/i2c/busses/i2c-stm32f7.c
+@@ -1665,6 +1665,16 @@ static int stm32f7_i2c_xfer(struct i2c_a
+ time_left = wait_for_completion_timeout(&i2c_dev->complete,
+ i2c_dev->adap.timeout);
+ ret = f7_msg->result;
++ if (ret) {
++ /*
++ * It is possible that some unsent data have already been
++ * written into TXDR. To avoid sending old data in a
++ * further transfer, flush TXDR in case of any error
++ */
++ writel_relaxed(STM32F7_I2C_ISR_TXE,
++ i2c_dev->base + STM32F7_I2C_ISR);
++ goto pm_free;
++ }
+
+ if (!time_left) {
+ dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n",
+@@ -1713,8 +1723,16 @@ static int stm32f7_i2c_smbus_xfer(struct
+ timeout = wait_for_completion_timeout(&i2c_dev->complete,
+ i2c_dev->adap.timeout);
+ ret = f7_msg->result;
+- if (ret)
++ if (ret) {
++ /*
++ * It is possible that some unsent data have already been
++ * written into TXDR. To avoid sending old data in a
++ * further transfer, flush TXDR in case of any error
++ */
++ writel_relaxed(STM32F7_I2C_ISR_TXE,
++ i2c_dev->base + STM32F7_I2C_ISR);
+ goto pm_free;
++ }
+
+ if (!timeout) {
+ dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr);
--- /dev/null
+From b933d1faf8fa30d16171bcff404e39c41b2a7c84 Mon Sep 17 00:00:00 2001
+From: Alain Volmat <alain.volmat@foss.st.com>
+Date: Mon, 20 Sep 2021 17:21:30 +0200
+Subject: i2c: stm32f7: recover the bus on access timeout
+
+From: Alain Volmat <alain.volmat@foss.st.com>
+
+commit b933d1faf8fa30d16171bcff404e39c41b2a7c84 upstream.
+
+When getting an access timeout, ensure that the bus is in a proper
+state prior to returning the error.
+
+Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver")
+Signed-off-by: Alain Volmat <alain.volmat@foss.st.com>
+Reviewed-by: Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-stm32f7.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/i2c/busses/i2c-stm32f7.c
++++ b/drivers/i2c/busses/i2c-stm32f7.c
+@@ -1681,6 +1681,7 @@ static int stm32f7_i2c_xfer(struct i2c_a
+ i2c_dev->msg->addr);
+ if (i2c_dev->use_dma)
+ dmaengine_terminate_all(dma->chan_using);
++ stm32f7_i2c_wait_free_bus(i2c_dev);
+ ret = -ETIMEDOUT;
+ }
+
+@@ -1738,6 +1739,7 @@ static int stm32f7_i2c_smbus_xfer(struct
+ dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr);
+ if (i2c_dev->use_dma)
+ dmaengine_terminate_all(dma->chan_using);
++ stm32f7_i2c_wait_free_bus(i2c_dev);
+ ret = -ETIMEDOUT;
+ goto pm_free;
+ }
--- /dev/null
+From 31b90a95ccbbb4b628578ac17e3b3cc8eeacfe31 Mon Sep 17 00:00:00 2001
+From: Alain Volmat <alain.volmat@foss.st.com>
+Date: Mon, 20 Sep 2021 17:21:31 +0200
+Subject: i2c: stm32f7: stop dma transfer in case of NACK
+
+From: Alain Volmat <alain.volmat@foss.st.com>
+
+commit 31b90a95ccbbb4b628578ac17e3b3cc8eeacfe31 upstream.
+
+In case of receiving a NACK, the dma transfer should be stopped
+to avoid feeding data into the FIFO.
+Also ensure to properly return the proper error code and avoid
+waiting for the end of the dma completion in case of
+error happening during the transmission.
+
+Fixes: 7ecc8cfde553 ("i2c: i2c-stm32f7: Add DMA support")
+Signed-off-by: Alain Volmat <alain.volmat@foss.st.com>
+Reviewed-by: Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-stm32f7.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-stm32f7.c
++++ b/drivers/i2c/busses/i2c-stm32f7.c
+@@ -1472,6 +1472,7 @@ static irqreturn_t stm32f7_i2c_isr_event
+ {
+ struct stm32f7_i2c_dev *i2c_dev = data;
+ struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg;
++ struct stm32_i2c_dma *dma = i2c_dev->dma;
+ void __iomem *base = i2c_dev->base;
+ u32 status, mask;
+ int ret = IRQ_HANDLED;
+@@ -1497,6 +1498,10 @@ static irqreturn_t stm32f7_i2c_isr_event
+ dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n",
+ __func__, f7_msg->addr);
+ writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR);
++ if (i2c_dev->use_dma) {
++ stm32f7_i2c_disable_dma_req(i2c_dev);
++ dmaengine_terminate_all(dma->chan_using);
++ }
+ f7_msg->result = -ENXIO;
+ }
+
+@@ -1512,7 +1517,7 @@ static irqreturn_t stm32f7_i2c_isr_event
+ /* Clear STOP flag */
+ writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR);
+
+- if (i2c_dev->use_dma) {
++ if (i2c_dev->use_dma && !f7_msg->result) {
+ ret = IRQ_WAKE_THREAD;
+ } else {
+ i2c_dev->master_mode = false;
+@@ -1525,7 +1530,7 @@ static irqreturn_t stm32f7_i2c_isr_event
+ if (f7_msg->stop) {
+ mask = STM32F7_I2C_CR2_STOP;
+ stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask);
+- } else if (i2c_dev->use_dma) {
++ } else if (i2c_dev->use_dma && !f7_msg->result) {
+ ret = IRQ_WAKE_THREAD;
+ } else if (f7_msg->smbus) {
+ stm32f7_i2c_smbus_rep_start(i2c_dev);
--- /dev/null
+From b0f38e15979fa8851e88e8aa371367f264e7b6e9 Mon Sep 17 00:00:00 2001
+From: Randy Dunlap <rdunlap@infradead.org>
+Date: Mon, 29 Nov 2021 22:39:47 -0800
+Subject: natsemi: xtensa: fix section mismatch warnings
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+commit b0f38e15979fa8851e88e8aa371367f264e7b6e9 upstream.
+
+Fix section mismatch warnings in xtsonic. The first one appears to be
+bogus and after fixing the second one, the first one is gone.
+
+WARNING: modpost: vmlinux.o(.text+0x529adc): Section mismatch in reference from the function sonic_get_stats() to the function .init.text:set_reset_devices()
+The function sonic_get_stats() references
+the function __init set_reset_devices().
+This is often because sonic_get_stats lacks a __init
+annotation or the annotation of set_reset_devices is wrong.
+
+WARNING: modpost: vmlinux.o(.text+0x529b3b): Section mismatch in reference from the function xtsonic_probe() to the function .init.text:sonic_probe1()
+The function xtsonic_probe() references
+the function __init sonic_probe1().
+This is often because xtsonic_probe lacks a __init
+annotation or the annotation of sonic_probe1 is wrong.
+
+Fixes: 74f2a5f0ef64 ("xtensa: Add support for the Sonic Ethernet device for the XT2000 board.")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Reported-by: kernel test robot <lkp@intel.com>
+Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Cc: Finn Thain <fthain@telegraphics.com.au>
+Cc: Chris Zankel <chris@zankel.net>
+Cc: linux-xtensa@linux-xtensa.org
+Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Acked-by: Max Filippov <jcmvbkbc@gmail.com>
+Link: https://lore.kernel.org/r/20211130063947.7529-1-rdunlap@infradead.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/natsemi/xtsonic.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/natsemi/xtsonic.c
++++ b/drivers/net/ethernet/natsemi/xtsonic.c
+@@ -120,7 +120,7 @@ static const struct net_device_ops xtson
+ .ndo_set_mac_address = eth_mac_addr,
+ };
+
+-static int __init sonic_probe1(struct net_device *dev)
++static int sonic_probe1(struct net_device *dev)
+ {
+ unsigned int silicon_revision;
+ struct sonic_local *lp = netdev_priv(dev);
--- /dev/null
+From 7d4741eacdefa5f0475431645b56baf00784df1f Mon Sep 17 00:00:00 2001
+From: Benjamin Poirier <bpoirier@nvidia.com>
+Date: Mon, 29 Nov 2021 15:15:05 +0900
+Subject: net: mpls: Fix notifications when deleting a device
+
+From: Benjamin Poirier <bpoirier@nvidia.com>
+
+commit 7d4741eacdefa5f0475431645b56baf00784df1f upstream.
+
+There are various problems related to netlink notifications for mpls route
+changes in response to interfaces being deleted:
+* delete interface of only nexthop
+ DELROUTE notification is missing RTA_OIF attribute
+* delete interface of non-last nexthop
+ NEWROUTE notification is missing entirely
+* delete interface of last nexthop
+ DELROUTE notification is missing nexthop
+
+All of these problems stem from the fact that existing routes are modified
+in-place before sending a notification. Restructure mpls_ifdown() to avoid
+changing the route in the DELROUTE cases and to create a copy in the
+NEWROUTE case.
+
+Fixes: f8efb73c97e2 ("mpls: multipath route support")
+Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mpls/af_mpls.c | 68 ++++++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 52 insertions(+), 16 deletions(-)
+
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -1490,22 +1490,52 @@ static void mpls_dev_destroy_rcu(struct
+ kfree(mdev);
+ }
+
+-static void mpls_ifdown(struct net_device *dev, int event)
++static int mpls_ifdown(struct net_device *dev, int event)
+ {
+ struct mpls_route __rcu **platform_label;
+ struct net *net = dev_net(dev);
+- u8 alive, deleted;
+ unsigned index;
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ for (index = 0; index < net->mpls.platform_labels; index++) {
+ struct mpls_route *rt = rtnl_dereference(platform_label[index]);
++ bool nh_del = false;
++ u8 alive = 0;
+
+ if (!rt)
+ continue;
+
+- alive = 0;
+- deleted = 0;
++ if (event == NETDEV_UNREGISTER) {
++ u8 deleted = 0;
++
++ for_nexthops(rt) {
++ struct net_device *nh_dev =
++ rtnl_dereference(nh->nh_dev);
++
++ if (!nh_dev || nh_dev == dev)
++ deleted++;
++ if (nh_dev == dev)
++ nh_del = true;
++ } endfor_nexthops(rt);
++
++ /* if there are no more nexthops, delete the route */
++ if (deleted == rt->rt_nhn) {
++ mpls_route_update(net, index, NULL, NULL);
++ continue;
++ }
++
++ if (nh_del) {
++ size_t size = sizeof(*rt) + rt->rt_nhn *
++ rt->rt_nh_size;
++ struct mpls_route *orig = rt;
++
++ rt = kmalloc(size, GFP_KERNEL);
++ if (!rt)
++ return -ENOMEM;
++ memcpy(rt, orig, size);
++ }
++ }
++
+ change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
+
+@@ -1529,16 +1559,15 @@ static void mpls_ifdown(struct net_devic
+ next:
+ if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
+ alive++;
+- if (!rtnl_dereference(nh->nh_dev))
+- deleted++;
+ } endfor_nexthops(rt);
+
+ WRITE_ONCE(rt->rt_nhn_alive, alive);
+
+- /* if there are no more nexthops, delete the route */
+- if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
+- mpls_route_update(net, index, NULL, NULL);
++ if (nh_del)
++ mpls_route_update(net, index, rt, NULL);
+ }
++
++ return 0;
+ }
+
+ static void mpls_ifup(struct net_device *dev, unsigned int flags)
+@@ -1596,8 +1625,12 @@ static int mpls_dev_notify(struct notifi
+ return NOTIFY_OK;
+
+ switch (event) {
++ int err;
++
+ case NETDEV_DOWN:
+- mpls_ifdown(dev, event);
++ err = mpls_ifdown(dev, event);
++ if (err)
++ return notifier_from_errno(err);
+ break;
+ case NETDEV_UP:
+ flags = dev_get_flags(dev);
+@@ -1608,13 +1641,18 @@ static int mpls_dev_notify(struct notifi
+ break;
+ case NETDEV_CHANGE:
+ flags = dev_get_flags(dev);
+- if (flags & (IFF_RUNNING | IFF_LOWER_UP))
++ if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
+ mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
+- else
+- mpls_ifdown(dev, event);
++ } else {
++ err = mpls_ifdown(dev, event);
++ if (err)
++ return notifier_from_errno(err);
++ }
+ break;
+ case NETDEV_UNREGISTER:
+- mpls_ifdown(dev, event);
++ err = mpls_ifdown(dev, event);
++ if (err)
++ return notifier_from_errno(err);
+ mdev = mpls_dev_get(dev);
+ if (mdev) {
+ mpls_dev_sysctl_unregister(dev, mdev);
+@@ -1625,8 +1663,6 @@ static int mpls_dev_notify(struct notifi
+ case NETDEV_CHANGENAME:
+ mdev = mpls_dev_get(dev);
+ if (mdev) {
+- int err;
+-
+ mpls_dev_sysctl_unregister(dev, mdev);
+ err = mpls_dev_sysctl_register(dev, mdev);
+ if (err)
--- /dev/null
+From e2dabc4f7e7b60299c20a36d6a7b24ed9bf8e572 Mon Sep 17 00:00:00 2001
+From: Zhou Qingyang <zhou1615@umn.edu>
+Date: Tue, 30 Nov 2021 19:08:48 +0800
+Subject: net: qlogic: qlcnic: Fix a NULL pointer dereference in qlcnic_83xx_add_rings()
+
+From: Zhou Qingyang <zhou1615@umn.edu>
+
+commit e2dabc4f7e7b60299c20a36d6a7b24ed9bf8e572 upstream.
+
+In qlcnic_83xx_add_rings(), the indirect function of
+ahw->hw_ops->alloc_mbx_args will be called to allocate memory for
+cmd.req.arg, and there is a dereference of it in qlcnic_83xx_add_rings(),
+which could lead to a NULL pointer dereference on failure of the
+indirect function like qlcnic_83xx_alloc_mbx_args().
+
+Fix this bug by adding a check of alloc_mbx_args(), this patch
+imitates the logic of mbx_cmd()'s failure handling.
+
+This bug was found by a static analyzer. The analysis employs
+differential checking to identify inconsistent security operations
+(e.g., checks or kfrees) between two code paths and confirms that the
+inconsistent operations are not recovered in the current function or
+the callers, so they constitute bugs.
+
+Note that, as a bug found by static analysis, it can be a false
+positive or hard to trigger. Multiple researchers have cross-reviewed
+the bug.
+
+Builds with CONFIG_QLCNIC=m show no new warnings, and our
+static analyzer no longer warns about this code.
+
+Fixes: 7f9664525f9c ("qlcnic: 83xx memory map and HW access routine")
+Signed-off-by: Zhou Qingyang <zhou1615@umn.edu>
+Link: https://lore.kernel.org/r/20211130110848.109026-1-zhou1615@umn.edu
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+@@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct
+ sds_mbx_size = sizeof(struct qlcnic_sds_mbx);
+ context_id = recv_ctx->context_id;
+ num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS;
+- ahw->hw_ops->alloc_mbx_args(&cmd, adapter,
+- QLCNIC_CMD_ADD_RCV_RINGS);
++ err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter,
++ QLCNIC_CMD_ADD_RCV_RINGS);
++ if (err) {
++ dev_err(&adapter->pdev->dev,
++ "Failed to alloc mbx args %d\n", err);
++ return err;
++ }
++
+ cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16);
+
+ /* set up status rings, mbx 2-81 */
drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch
drm-amdgpu-init-iommu-after-amdkfd-device-init.patch
drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch
+wireguard-selftests-increase-default-dmesg-log-size.patch
+wireguard-allowedips-add-missing-__rcu-annotation-to-satisfy-sparse.patch
+wireguard-selftests-actually-test-for-routing-loops.patch
+wireguard-selftests-rename-debug_pi_list-to-debug_plist.patch
+wireguard-device-reset-peer-src-endpoint-when-netns-exits.patch
+wireguard-receive-use-ring-buffer-for-incoming-handshakes.patch
+wireguard-receive-drop-handshakes-if-queue-lock-is-contended.patch
+wireguard-ratelimiter-use-kvcalloc-instead-of-kvzalloc.patch
+i2c-stm32f7-flush-tx-fifo-upon-transfer-errors.patch
+i2c-stm32f7-recover-the-bus-on-access-timeout.patch
+i2c-stm32f7-stop-dma-transfer-in-case-of-nack.patch
+i2c-cbus-gpio-set-atomic-transfer-callback.patch
+natsemi-xtensa-fix-section-mismatch-warnings.patch
+tcp-fix-page-frag-corruption-on-page-fault.patch
+net-qlogic-qlcnic-fix-a-null-pointer-dereference-in-qlcnic_83xx_add_rings.patch
+net-mpls-fix-notifications-when-deleting-a-device.patch
+siphash-use-_unaligned-version-by-default.patch
--- /dev/null
+From f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Mon, 29 Nov 2021 10:39:29 -0500
+Subject: siphash: use _unaligned version by default
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d upstream.
+
+On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+because the ordinary load/store instructions (ldr, ldrh, ldrb) can
+tolerate any misalignment of the memory address. However, load/store
+double and load/store multiple instructions (ldrd, ldm) may still only
+be used on memory addresses that are 32-bit aligned, and so we have to
+use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we
+may end up with a severe performance hit due to alignment traps that
+require fixups by the kernel. Testing shows that this currently happens
+with clang-13 but not gcc-11. In theory, any compiler version can
+produce this bug or other problems, as we are dealing with undefined
+behavior in C99 even on architectures that support this in hardware,
+see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363.
+
+Fortunately, the get_unaligned() accessors do the right thing: when
+building for ARMv6 or later, the compiler will emit unaligned accesses
+using the ordinary load/store instructions (but avoid the ones that
+require 32-bit alignment). When building for older ARM, those accessors
+will emit the appropriate sequence of ldrb/mov/orr instructions. And on
+architectures that can truly tolerate any kind of misalignment, the
+get_unaligned() accessors resolve to the leXX_to_cpup accessors that
+operate on aligned addresses.
+
+Since the compiler will in fact emit ldrd or ldm instructions when
+building this code for ARM v6 or later, the solution is to use the
+unaligned accessors unconditionally on architectures where this is
+known to be fast. The _aligned version of the hash function is
+however still needed to get the best performance on architectures
+that cannot do any unaligned access in hardware.
+
+This new version avoids the undefined behavior and should produce
+the fastest hash on all architectures we support.
+
+Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@linaro.org/
+Link: https://lore.kernel.org/linux-crypto/CAK8P3a2KfmmGDbVHULWevB0hv71P2oi2ZCHEAqT=8dQfa0=cqQ@mail.gmail.com/
+Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/siphash.h | 14 ++++----------
+ lib/siphash.c | 12 ++++++------
+ 2 files changed, 10 insertions(+), 16 deletions(-)
+
+--- a/include/linux/siphash.h
++++ b/include/linux/siphash.h
+@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(c
+ }
+
+ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
+-#endif
+
+ u64 siphash_1u64(const u64 a, const siphash_key_t *key);
+ u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
+@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(con
+ static inline u64 siphash(const void *data, size_t len,
+ const siphash_key_t *key)
+ {
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+- if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
++ !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+ return __siphash_unaligned(data, len, key);
+-#endif
+ return ___siphash_aligned(data, len, key);
+ }
+
+@@ -96,10 +93,8 @@ typedef struct {
+
+ u32 __hsiphash_aligned(const void *data, size_t len,
+ const hsiphash_key_t *key);
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key);
+-#endif
+
+ u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
+ u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
+@@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(co
+ static inline u32 hsiphash(const void *data, size_t len,
+ const hsiphash_key_t *key)
+ {
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+- if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
++ !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+ return __hsiphash_unaligned(data, len, key);
+-#endif
+ return ___hsiphash_aligned(data, len, key);
+ }
+
+--- a/lib/siphash.c
++++ b/lib/siphash.c
+@@ -49,6 +49,7 @@
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u64));
+@@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data,
+ POSTAMBLE
+ }
+ EXPORT_SYMBOL(__siphash_aligned);
++#endif
+
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u64));
+@@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data
+ POSTAMBLE
+ }
+ EXPORT_SYMBOL(__siphash_unaligned);
+-#endif
+
+ /**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+@@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32);
+ HSIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u64));
+@@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data,
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_aligned);
++#endif
+
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+ {
+@@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *dat
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_unaligned);
+-#endif
+
+ /**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+@@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32);
+ HSIPROUND; \
+ return v1 ^ v3;
+
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u32));
+@@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data,
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_aligned);
++#endif
+
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+ {
+@@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *dat
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_unaligned);
+-#endif
+
+ /**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
--- /dev/null
+From dacb5d8875cc6cd3a553363b4d6f06760fcbe70c Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 26 Nov 2021 19:34:21 +0100
+Subject: tcp: fix page frag corruption on page fault
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit dacb5d8875cc6cd3a553363b4d6f06760fcbe70c upstream.
+
+Steffen reported a TCP stream corruption for HTTP requests
+served by the apache web-server using a cifs mount-point
+and memory mapping the relevant file.
+
+The root cause is quite similar to the one addressed by
+commit 20eb4f29b602 ("net: fix sk_page_frag() recursion from
+memory reclaim"). Here the nested access to the task page frag
+is caused by a page fault on the (mmapped) user-space memory
+buffer coming from the cifs file.
+
+The page fault handler performs an smb transaction on a different
+socket, inside the same process context. Since sk->sk_allaction
+for such socket does not prevent the usage for the task_frag,
+the nested allocation modify "under the hood" the page frag
+in use by the outer sendmsg call, corrupting the stream.
+
+The overall relevant stack trace looks like the following:
+
+httpd 78268 [001] 3461630.850950: probe:tcp_sendmsg_locked:
+ ffffffff91461d91 tcp_sendmsg_locked+0x1
+ ffffffff91462b57 tcp_sendmsg+0x27
+ ffffffff9139814e sock_sendmsg+0x3e
+ ffffffffc06dfe1d smb_send_kvec+0x28
+ [...]
+ ffffffffc06cfaf8 cifs_readpages+0x213
+ ffffffff90e83c4b read_pages+0x6b
+ ffffffff90e83f31 __do_page_cache_readahead+0x1c1
+ ffffffff90e79e98 filemap_fault+0x788
+ ffffffff90eb0458 __do_fault+0x38
+ ffffffff90eb5280 do_fault+0x1a0
+ ffffffff90eb7c84 __handle_mm_fault+0x4d4
+ ffffffff90eb8093 handle_mm_fault+0xc3
+ ffffffff90c74f6d __do_page_fault+0x1ed
+ ffffffff90c75277 do_page_fault+0x37
+ ffffffff9160111e page_fault+0x1e
+ ffffffff9109e7b5 copyin+0x25
+ ffffffff9109eb40 _copy_from_iter_full+0xe0
+ ffffffff91462370 tcp_sendmsg_locked+0x5e0
+ ffffffff91462370 tcp_sendmsg_locked+0x5e0
+ ffffffff91462b57 tcp_sendmsg+0x27
+ ffffffff9139815c sock_sendmsg+0x4c
+ ffffffff913981f7 sock_write_iter+0x97
+ ffffffff90f2cc56 do_iter_readv_writev+0x156
+ ffffffff90f2dff0 do_iter_write+0x80
+ ffffffff90f2e1c3 vfs_writev+0xa3
+ ffffffff90f2e27c do_writev+0x5c
+ ffffffff90c042bb do_syscall_64+0x5b
+ ffffffff916000ad entry_SYSCALL_64_after_hwframe+0x65
+
+The cifs filesystem rightfully sets sk_allocations to GFP_NOFS,
+we can avoid the nesting using the sk page frag for allocation
+lacking the __GFP_FS flag. Do not define an additional mm-helper
+for that, as this is strictly tied to the sk page frag usage.
+
+v1 -> v2:
+ - use a stricted sk_page_frag() check instead of reordering the
+ code (Eric)
+
+Reported-by: Steffen Froemer <sfroemer@redhat.com>
+Fixes: 5640f7685831 ("net: use a per task frag allocator")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2322,19 +2322,22 @@ struct sk_buff *sk_stream_alloc_skb(stru
+ * @sk: socket
+ *
+ * Use the per task page_frag instead of the per socket one for
+- * optimization when we know that we're in the normal context and owns
++ * optimization when we know that we're in process context and own
+ * everything that's associated with %current.
+ *
+- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+- * inside other socket operations and end up recursing into sk_page_frag()
+- * while it's already in use.
++ * Both direct reclaim and page faults can nest inside other
++ * socket operations and end up recursing into sk_page_frag()
++ * while it's already in use: explicitly avoid task page_frag
++ * usage if the caller is potentially doing any of them.
++ * This assumes that page fault handlers use the GFP_NOFS flags.
+ *
+ * Return: a per task page_frag if context allows that,
+ * otherwise a per socket one.
+ */
+ static inline struct page_frag *sk_page_frag(struct sock *sk)
+ {
+- if (gfpflags_normal_context(sk->sk_allocation))
++ if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
++ (__GFP_DIRECT_RECLAIM | __GFP_FS))
+ return ¤t->task_frag;
+
+ return &sk->sk_frag;
--- /dev/null
+From ae9287811ba75571cd69505d50ab0e612ace8572 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 29 Nov 2021 10:39:20 -0500
+Subject: wireguard: allowedips: add missing __rcu annotation to satisfy sparse
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit ae9287811ba75571cd69505d50ab0e612ace8572 upstream.
+
+A __rcu annotation got lost during refactoring, which caused sparse to
+become enraged.
+
+Fixes: bf7b042dc62a ("wireguard: allowedips: free empty intermediate nodes when removing single node")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/allowedips.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/wireguard/allowedips.c
++++ b/drivers/net/wireguard/allowedips.c
+@@ -163,7 +163,7 @@ static bool node_placement(struct allowe
+ return exact;
+ }
+
+-static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
++static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node)
+ {
+ node->parent_bit_packed = (unsigned long)parent | bit;
+ rcu_assign_pointer(*parent, node);
--- /dev/null
+From 20ae1d6aa159eb91a9bf09ff92ccaa94dbea92c2 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 29 Nov 2021 10:39:25 -0500
+Subject: wireguard: device: reset peer src endpoint when netns exits
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 20ae1d6aa159eb91a9bf09ff92ccaa94dbea92c2 upstream.
+
+Each peer's endpoint contains a dst_cache entry that takes a reference
+to another netdev. When the containing namespace exits, we take down the
+socket and prevent future sockets from being created (by setting
+creating_net to NULL), which removes that potential reference on the
+netns. However, it doesn't release references to the netns that a netdev
+cached in dst_cache might be taking, so the netns still might fail to
+exit. Since the socket is gimped anyway, we can simply clear all the
+dst_caches (by way of clearing the endpoint src), which will release all
+references.
+
+However, the current dst_cache_reset function only releases those
+references lazily. But it turns out that all of our usages of
+wg_socket_clear_peer_endpoint_src are called from contexts that are not
+exactly high-speed or bottle-necked. For example, when there's
+connection difficulty, or when userspace is reconfiguring the interface.
+And in particular for this patch, when the netns is exiting. So for
+those cases, it makes more sense to call dst_release immediately. For
+that, we add a small helper function to dst_cache.
+
+This patch also adds a test to netns.sh from Hangbin Liu to ensure this
+doesn't regress.
+
+Tested-by: Hangbin Liu <liuhangbin@gmail.com>
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Cc: Toke Høiland-Jørgensen <toke@redhat.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Fixes: 900575aa33a3 ("wireguard: device: avoid circular netns references")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/device.c | 3 +++
+ drivers/net/wireguard/socket.c | 2 +-
+ include/net/dst_cache.h | 11 +++++++++++
+ net/core/dst_cache.c | 19 +++++++++++++++++++
+ tools/testing/selftests/wireguard/netns.sh | 24 +++++++++++++++++++++++-
+ 5 files changed, 57 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireguard/device.c
++++ b/drivers/net/wireguard/device.c
+@@ -398,6 +398,7 @@ static struct rtnl_link_ops link_ops __r
+ static void wg_netns_pre_exit(struct net *net)
+ {
+ struct wg_device *wg;
++ struct wg_peer *peer;
+
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
+@@ -407,6 +408,8 @@ static void wg_netns_pre_exit(struct net
+ mutex_lock(&wg->device_update_lock);
+ rcu_assign_pointer(wg->creating_net, NULL);
+ wg_socket_reinit(wg, NULL, NULL);
++ list_for_each_entry(peer, &wg->peer_list, peer_list)
++ wg_socket_clear_peer_endpoint_src(peer);
+ mutex_unlock(&wg->device_update_lock);
+ }
+ }
+--- a/drivers/net/wireguard/socket.c
++++ b/drivers/net/wireguard/socket.c
+@@ -308,7 +308,7 @@ void wg_socket_clear_peer_endpoint_src(s
+ {
+ write_lock_bh(&peer->endpoint_lock);
+ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
+- dst_cache_reset(&peer->endpoint_cache);
++ dst_cache_reset_now(&peer->endpoint_cache);
+ write_unlock_bh(&peer->endpoint_lock);
+ }
+
+--- a/include/net/dst_cache.h
++++ b/include/net/dst_cache.h
+@@ -80,6 +80,17 @@ static inline void dst_cache_reset(struc
+ }
+
+ /**
++ * dst_cache_reset_now - invalidate the cache contents immediately
++ * @dst_cache: the cache
++ *
++ * The caller must be sure there are no concurrent users, as this frees
++ * all dst_cache users immediately, rather than waiting for the next
++ * per-cpu usage like dst_cache_reset does. Most callers should use the
++ * higher speed lazily-freed dst_cache_reset function instead.
++ */
++void dst_cache_reset_now(struct dst_cache *dst_cache);
++
++/**
+ * dst_cache_init - initialize the cache, allocating the required storage
+ * @dst_cache: the cache
+ * @gfp: allocation flags
+--- a/net/core/dst_cache.c
++++ b/net/core/dst_cache.c
+@@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache
+ free_percpu(dst_cache->cache);
+ }
+ EXPORT_SYMBOL_GPL(dst_cache_destroy);
++
++void dst_cache_reset_now(struct dst_cache *dst_cache)
++{
++ int i;
++
++ if (!dst_cache->cache)
++ return;
++
++ dst_cache->reset_ts = jiffies;
++ for_each_possible_cpu(i) {
++ struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
++ struct dst_entry *dst = idst->dst;
++
++ idst->cookie = 0;
++ idst->dst = NULL;
++ dst_release(dst);
++ }
++}
++EXPORT_SYMBOL_GPL(dst_cache_reset_now);
+--- a/tools/testing/selftests/wireguard/netns.sh
++++ b/tools/testing/selftests/wireguard/netns.sh
+@@ -613,6 +613,28 @@ ip0 link set wg0 up
+ kill $ncat_pid
+ ip0 link del wg0
+
++# Ensure that dst_cache references don't outlive netns lifetime
++ip1 link add dev wg0 type wireguard
++ip2 link add dev wg0 type wireguard
++configure_peers
++ip1 link add veth1 type veth peer name veth2
++ip1 link set veth2 netns $netns2
++ip1 addr add fd00:aa::1/64 dev veth1
++ip2 addr add fd00:aa::2/64 dev veth2
++ip1 link set veth1 up
++ip2 link set veth2 up
++waitiface $netns1 veth1
++waitiface $netns2 veth2
++ip1 -6 route add default dev veth1 via fd00:aa::2
++ip2 -6 route add default dev veth2 via fd00:aa::1
++n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
++n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
++n1 ping6 -c 1 fd00::2
++pp ip netns delete $netns1
++pp ip netns delete $netns2
++pp ip netns add $netns1
++pp ip netns add $netns2
++
+ # Ensure there aren't circular reference loops
+ ip1 link add wg1 type wireguard
+ ip2 link add wg2 type wireguard
+@@ -631,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null ||
+ done < /dev/kmsg
+ alldeleted=1
+ for object in "${!objects[@]}"; do
+- if [[ ${objects["$object"]} != *createddestroyed ]]; then
++ if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then
+ echo "Error: $object: merely ${objects["$object"]}" >&3
+ alldeleted=0
+ fi
--- /dev/null
+From 4e3fd721710553832460c179c2ee5ce67ef7f1e0 Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Date: Mon, 29 Nov 2021 10:39:28 -0500
+Subject: wireguard: ratelimiter: use kvcalloc() instead of kvzalloc()
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+commit 4e3fd721710553832460c179c2ee5ce67ef7f1e0 upstream.
+
+Use 2-factor argument form kvcalloc() instead of kvzalloc().
+
+Link: https://github.com/KSPP/linux/issues/162
+Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+[Jason: Gustavo's link above is for KSPP, but this isn't actually a
+ security fix, as table_size is bounded to 8192 anyway, and gcc realizes
+ this, so the codegen comes out to be about the same.]
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/ratelimiter.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireguard/ratelimiter.c
++++ b/drivers/net/wireguard/ratelimiter.c
+@@ -176,12 +176,12 @@ int wg_ratelimiter_init(void)
+ (1U << 14) / sizeof(struct hlist_head)));
+ max_entries = table_size * 8;
+
+- table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
++ table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
+ if (unlikely(!table_v4))
+ goto err_kmemcache;
+
+ #if IS_ENABLED(CONFIG_IPV6)
+- table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
++ table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
+ if (unlikely(!table_v6)) {
+ kvfree(table_v4);
+ goto err_kmemcache;
--- /dev/null
+From fb32f4f606c17b869805d7cede8b03d78339b50a Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 29 Nov 2021 10:39:27 -0500
+Subject: wireguard: receive: drop handshakes if queue lock is contended
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit fb32f4f606c17b869805d7cede8b03d78339b50a upstream.
+
+If we're being delivered packets from multiple CPUs so quickly that the
+ring lock is contended for CPU tries, then it's safe to assume that the
+queue is near capacity anyway, so just drop the packet rather than
+spinning. This helps deal with multicore DoS that can interfere with
+data path performance. It _still_ does not completely fix the issue, but
+it again chips away at it.
+
+Reported-by: Streun Fabio <fstreun@student.ethz.ch>
+Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/receive.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireguard/receive.c
++++ b/drivers/net/wireguard/receive.c
+@@ -554,9 +554,19 @@ void wg_packet_receive(struct wg_device
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
+- int cpu;
+- if (unlikely(!rng_is_initialized() ||
+- ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) {
++ int cpu, ret = -EBUSY;
++
++ if (unlikely(!rng_is_initialized()))
++ goto drop;
++ if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) {
++ if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) {
++ ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb);
++ spin_unlock_bh(&wg->handshake_queue.ring.producer_lock);
++ }
++ } else
++ ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb);
++ if (ret) {
++ drop:
+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
--- /dev/null
+From 886fcee939adb5e2af92741b90643a59f2b54f97 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 29 Nov 2021 10:39:26 -0500
+Subject: wireguard: receive: use ring buffer for incoming handshakes
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 886fcee939adb5e2af92741b90643a59f2b54f97 upstream.
+
+Apparently the spinlock on incoming_handshake's skb_queue is highly
+contended, and a torrent of handshake or cookie packets can bring the
+data plane to its knees, simply by virtue of enqueueing the handshake
+packets to be processed asynchronously. So, we try switching this to a
+ring buffer to hopefully have less lock contention. This alleviates the
+problem somewhat, though it still isn't perfect, so future patches will
+have to improve this further. However, it at least doesn't completely
+diminish the data plane.
+
+Reported-by: Streun Fabio <fstreun@student.ethz.ch>
+Reported-by: Joel Wanner <joel.wanner@inf.ethz.ch>
+Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireguard/device.c | 36 ++++++++++++++++++------------------
+ drivers/net/wireguard/device.h | 9 +++------
+ drivers/net/wireguard/queueing.c | 6 +++---
+ drivers/net/wireguard/queueing.h | 2 +-
+ drivers/net/wireguard/receive.c | 27 ++++++++++++---------------
+ 5 files changed, 37 insertions(+), 43 deletions(-)
+
+--- a/drivers/net/wireguard/device.c
++++ b/drivers/net/wireguard/device.c
+@@ -98,6 +98,7 @@ static int wg_stop(struct net_device *de
+ {
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
++ struct sk_buff *skb;
+
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+@@ -108,7 +109,9 @@ static int wg_stop(struct net_device *de
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ }
+ mutex_unlock(&wg->device_update_lock);
+- skb_queue_purge(&wg->incoming_handshakes);
++ while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL)
++ kfree_skb(skb);
++ atomic_set(&wg->handshake_queue_len, 0);
+ wg_socket_reinit(wg, NULL, NULL);
+ return 0;
+ }
+@@ -235,14 +238,13 @@ static void wg_destruct(struct net_devic
+ destroy_workqueue(wg->handshake_receive_wq);
+ destroy_workqueue(wg->handshake_send_wq);
+ destroy_workqueue(wg->packet_crypt_wq);
+- wg_packet_queue_free(&wg->decrypt_queue);
+- wg_packet_queue_free(&wg->encrypt_queue);
++ wg_packet_queue_free(&wg->handshake_queue, true);
++ wg_packet_queue_free(&wg->decrypt_queue, false);
++ wg_packet_queue_free(&wg->encrypt_queue, false);
+ rcu_barrier(); /* Wait for all the peers to be actually freed. */
+ wg_ratelimiter_uninit();
+ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
+- skb_queue_purge(&wg->incoming_handshakes);
+ free_percpu(dev->tstats);
+- free_percpu(wg->incoming_handshakes_worker);
+ kvfree(wg->index_hashtable);
+ kvfree(wg->peer_hashtable);
+ mutex_unlock(&wg->device_update_lock);
+@@ -298,7 +300,6 @@ static int wg_newlink(struct net *src_ne
+ init_rwsem(&wg->static_identity.lock);
+ mutex_init(&wg->socket_update_lock);
+ mutex_init(&wg->device_update_lock);
+- skb_queue_head_init(&wg->incoming_handshakes);
+ wg_allowedips_init(&wg->peer_allowedips);
+ wg_cookie_checker_init(&wg->cookie_checker, wg);
+ INIT_LIST_HEAD(&wg->peer_list);
+@@ -316,16 +317,10 @@ static int wg_newlink(struct net *src_ne
+ if (!dev->tstats)
+ goto err_free_index_hashtable;
+
+- wg->incoming_handshakes_worker =
+- wg_packet_percpu_multicore_worker_alloc(
+- wg_packet_handshake_receive_worker, wg);
+- if (!wg->incoming_handshakes_worker)
+- goto err_free_tstats;
+-
+ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
+ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_receive_wq)
+- goto err_free_incoming_handshakes;
++ goto err_free_tstats;
+
+ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
+ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
+@@ -347,10 +342,15 @@ static int wg_newlink(struct net *src_ne
+ if (ret < 0)
+ goto err_free_encrypt_queue;
+
+- ret = wg_ratelimiter_init();
++ ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker,
++ MAX_QUEUED_INCOMING_HANDSHAKES);
+ if (ret < 0)
+ goto err_free_decrypt_queue;
+
++ ret = wg_ratelimiter_init();
++ if (ret < 0)
++ goto err_free_handshake_queue;
++
+ ret = register_netdevice(dev);
+ if (ret < 0)
+ goto err_uninit_ratelimiter;
+@@ -367,18 +367,18 @@ static int wg_newlink(struct net *src_ne
+
+ err_uninit_ratelimiter:
+ wg_ratelimiter_uninit();
++err_free_handshake_queue:
++ wg_packet_queue_free(&wg->handshake_queue, false);
+ err_free_decrypt_queue:
+- wg_packet_queue_free(&wg->decrypt_queue);
++ wg_packet_queue_free(&wg->decrypt_queue, false);
+ err_free_encrypt_queue:
+- wg_packet_queue_free(&wg->encrypt_queue);
++ wg_packet_queue_free(&wg->encrypt_queue, false);
+ err_destroy_packet_crypt:
+ destroy_workqueue(wg->packet_crypt_wq);
+ err_destroy_handshake_send:
+ destroy_workqueue(wg->handshake_send_wq);
+ err_destroy_handshake_receive:
+ destroy_workqueue(wg->handshake_receive_wq);
+-err_free_incoming_handshakes:
+- free_percpu(wg->incoming_handshakes_worker);
+ err_free_tstats:
+ free_percpu(dev->tstats);
+ err_free_index_hashtable:
+--- a/drivers/net/wireguard/device.h
++++ b/drivers/net/wireguard/device.h
+@@ -39,21 +39,18 @@ struct prev_queue {
+
+ struct wg_device {
+ struct net_device *dev;
+- struct crypt_queue encrypt_queue, decrypt_queue;
++ struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue;
+ struct sock __rcu *sock4, *sock6;
+ struct net __rcu *creating_net;
+ struct noise_static_identity static_identity;
+- struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
+- struct workqueue_struct *packet_crypt_wq;
+- struct sk_buff_head incoming_handshakes;
+- int incoming_handshake_cpu;
+- struct multicore_worker __percpu *incoming_handshakes_worker;
++ struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq;
+ struct cookie_checker cookie_checker;
+ struct pubkey_hashtable *peer_hashtable;
+ struct index_hashtable *index_hashtable;
+ struct allowedips peer_allowedips;
+ struct mutex device_update_lock, socket_update_lock;
+ struct list_head device_list, peer_list;
++ atomic_t handshake_queue_len;
+ unsigned int num_peers, device_update_gen;
+ u32 fwmark;
+ u16 incoming_port;
+--- a/drivers/net/wireguard/queueing.c
++++ b/drivers/net/wireguard/queueing.c
+@@ -38,11 +38,11 @@ int wg_packet_queue_init(struct crypt_qu
+ return 0;
+ }
+
+-void wg_packet_queue_free(struct crypt_queue *queue)
++void wg_packet_queue_free(struct crypt_queue *queue, bool purge)
+ {
+ free_percpu(queue->worker);
+- WARN_ON(!__ptr_ring_empty(&queue->ring));
+- ptr_ring_cleanup(&queue->ring, NULL);
++ WARN_ON(!purge && !__ptr_ring_empty(&queue->ring));
++ ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL);
+ }
+
+ #define NEXT(skb) ((skb)->prev)
+--- a/drivers/net/wireguard/queueing.h
++++ b/drivers/net/wireguard/queueing.h
+@@ -23,7 +23,7 @@ struct sk_buff;
+ /* queueing.c APIs: */
+ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ unsigned int len);
+-void wg_packet_queue_free(struct crypt_queue *queue);
++void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
+ struct multicore_worker __percpu *
+ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
+
+--- a/drivers/net/wireguard/receive.c
++++ b/drivers/net/wireguard/receive.c
+@@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(
+ return;
+ }
+
+- under_load = skb_queue_len(&wg->incoming_handshakes) >=
+- MAX_QUEUED_INCOMING_HANDSHAKES / 8;
++ under_load = atomic_read(&wg->handshake_queue_len) >=
++ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+ if (under_load) {
+ last_under_load = ktime_get_coarse_boottime_ns();
+ } else if (last_under_load) {
+@@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(
+
+ void wg_packet_handshake_receive_worker(struct work_struct *work)
+ {
+- struct wg_device *wg = container_of(work, struct multicore_worker,
+- work)->ptr;
++ struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
++ struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue);
+ struct sk_buff *skb;
+
+- while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
++ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ wg_receive_handshake_packet(wg, skb);
+ dev_kfree_skb(skb);
++ atomic_dec(&wg->handshake_queue_len);
+ cond_resched();
+ }
+ }
+@@ -554,21 +555,17 @@ void wg_packet_receive(struct wg_device
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
+ int cpu;
+-
+- if (skb_queue_len(&wg->incoming_handshakes) >
+- MAX_QUEUED_INCOMING_HANDSHAKES ||
+- unlikely(!rng_is_initialized())) {
++ if (unlikely(!rng_is_initialized() ||
++ ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) {
+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
+- skb_queue_tail(&wg->incoming_handshakes, skb);
+- /* Queues up a call to packet_process_queued_handshake_
+- * packets(skb):
+- */
+- cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
++ atomic_inc(&wg->handshake_queue_len);
++ cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu);
++ /* Queues up a call to packet_process_queued_handshake_packets(skb): */
+ queue_work_on(cpu, wg->handshake_receive_wq,
+- &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
++ &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_DATA):
--- /dev/null
+From 782c72af567fc2ef09bd7615d0307f24de72c7e0 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 29 Nov 2021 10:39:22 -0500
+Subject: wireguard: selftests: actually test for routing loops
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 782c72af567fc2ef09bd7615d0307f24de72c7e0 upstream.
+
+We previously removed the restriction on looping to self, and then added
+a test to make sure the kernel didn't blow up during a routing loop. The
+kernel didn't blow up, thankfully, but on certain architectures where
+skb fragmentation is easier, such as ppc64, the skbs weren't actually
+being discarded after a few rounds through. But the test wasn't catching
+this. So actually test explicitly for massive increases in tx to see if
+we have a routing loop. Note that the actual loop problem will need to
+be addressed in a different commit.
+
+Fixes: b673e24aad36 ("wireguard: socket: remove errant restriction on looping to self")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/wireguard/netns.sh | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/wireguard/netns.sh
++++ b/tools/testing/selftests/wireguard/netns.sh
+@@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2
+ n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
+ ip2 link del wg0
+ ip2 link del wg1
+-! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
++read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
++! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
++sleep 1
++read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
++(( tx_bytes_after - tx_bytes_before < 70000 ))
+
+ ip0 link del wg1
+ ip1 link del wg0
--- /dev/null
+From 03ff1b1def73f817e196bf96ab36ac259490bd7c Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 29 Nov 2021 10:39:21 -0500
+Subject: wireguard: selftests: increase default dmesg log size
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 03ff1b1def73f817e196bf96ab36ac259490bd7c upstream.
+
+The selftests currently parse the kernel log at the end to track
+potential memory leaks. With these tests now reading off the end of the
+buffer, due to recent optimizations, some creation messages were lost,
+making the tests think that there was a free without an alloc. Fix this
+by increasing the kernel log size.
+
+Fixes: 24b70eeeb4f4 ("wireguard: use synchronize_net rather than synchronize_rcu")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/wireguard/qemu/kernel.config | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
+index 74db83a0aedd..a9b5a520a1d2 100644
+--- a/tools/testing/selftests/wireguard/qemu/kernel.config
++++ b/tools/testing/selftests/wireguard/qemu/kernel.config
+@@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y
+ CONFIG_SYSFS=y
+ CONFIG_TMPFS=y
+ CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
++CONFIG_LOG_BUF_SHIFT=18
+ CONFIG_PRINTK_TIME=y
+ CONFIG_BLK_DEV_INITRD=y
+ CONFIG_LEGACY_VSYSCALL_NONE=y
+--
+2.34.1
+
--- /dev/null
+From 7e938beb8321d34f040557b8915b228af125f73c Mon Sep 17 00:00:00 2001
+From: Li Zhijian <lizhijian@cn.fujitsu.com>
+Date: Mon, 29 Nov 2021 10:39:24 -0500
+Subject: wireguard: selftests: rename DEBUG_PI_LIST to DEBUG_PLIST
+
+From: Li Zhijian <lizhijian@cn.fujitsu.com>
+
+commit 7e938beb8321d34f040557b8915b228af125f73c upstream.
+
+DEBUG_PI_LIST was renamed to DEBUG_PLIST since 8e18faeac3 ("lib/plist:
+rename DEBUG_PI_LIST to DEBUG_PLIST").
+
+Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
+Fixes: 8e18faeac3e4 ("lib/plist: rename DEBUG_PI_LIST to DEBUG_PLIST")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/wireguard/qemu/debug.config | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/wireguard/qemu/debug.config
++++ b/tools/testing/selftests/wireguard/qemu/debug.config
+@@ -48,7 +48,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
+ CONFIG_TRACE_IRQFLAGS=y
+ CONFIG_DEBUG_BUGVERBOSE=y
+ CONFIG_DEBUG_LIST=y
+-CONFIG_DEBUG_PI_LIST=y
++CONFIG_DEBUG_PLIST=y
+ CONFIG_PROVE_RCU=y
+ CONFIG_SPARSE_RCU_POINTER=y
+ CONFIG_RCU_CPU_STALL_TIMEOUT=21