5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 25 Jan 2021 15:29:34 +0000 (16:29 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 25 Jan 2021 15:29:34 +0000 (16:29 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Jan 2021 15:29:34 +0000 (16:29 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Jan 2021 15:29:34 +0000 (16:29 +0100)
diff --git a/queue-5.10/ipv6-create-multicast-route-with-rtprot_kernel.patch b/queue-5.10/ipv6-create-multicast-route-with-rtprot_kernel.patch

new file mode 100644 (file)

index 0000000..1484d16
--- /dev/null
+++ b/queue-5.10/ipv6-create-multicast-route-with-rtprot_kernel.patch
@@ -0,0 +1,40 @@
+From a826b04303a40d52439aa141035fca5654ccaccd Mon Sep 17 00:00:00 2001
+From: Matteo Croce <mcroce@microsoft.com>
+Date: Fri, 15 Jan 2021 19:42:08 +0100
+Subject: ipv6: create multicast route with RTPROT_KERNEL
+
+From: Matteo Croce <mcroce@microsoft.com>
+
+commit a826b04303a40d52439aa141035fca5654ccaccd upstream.
+
+The ff00::/8 multicast route is created without specifying the fc_protocol
+field, so the default RTPROT_BOOT value is used:
+
+  $ ip -6 -d route
+  unicast ::1 dev lo proto kernel scope global metric 256 pref medium
+  unicast fe80::/64 dev eth0 proto kernel scope global metric 256 pref medium
+  unicast ff00::/8 dev eth0 proto boot scope global metric 256 pref medium
+
+As the documentation says, this value identifies routes installed during
+boot, but the route is created when interface is set up.
+Change the value to RTPROT_KERNEL which is a better value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Matteo Croce <mcroce@microsoft.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv6/addrconf.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -2468,6 +2468,7 @@ static void addrconf_add_mroute(struct n
+               .fc_flags = RTF_UP,
+               .fc_type = RTN_UNICAST,
+               .fc_nlinfo.nl_net = dev_net(dev),
++              .fc_protocol = RTPROT_KERNEL,
+       };
+ 
+       ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
diff --git a/queue-5.10/ipv6-set-multicast-flag-on-the-multicast-route.patch b/queue-5.10/ipv6-set-multicast-flag-on-the-multicast-route.patch

new file mode 100644 (file)

index 0000000..1580027
--- /dev/null
+++ b/queue-5.10/ipv6-set-multicast-flag-on-the-multicast-route.patch
@@ -0,0 +1,38 @@
+From ceed9038b2783d14e0422bdc6fd04f70580efb4c Mon Sep 17 00:00:00 2001
+From: Matteo Croce <mcroce@microsoft.com>
+Date: Fri, 15 Jan 2021 19:42:09 +0100
+Subject: ipv6: set multicast flag on the multicast route
+
+From: Matteo Croce <mcroce@microsoft.com>
+
+commit ceed9038b2783d14e0422bdc6fd04f70580efb4c upstream.
+
+The multicast route ff00::/8 is created with type RTN_UNICAST:
+
+  $ ip -6 -d route
+  unicast ::1 dev lo proto kernel scope global metric 256 pref medium
+  unicast fe80::/64 dev eth0 proto kernel scope global metric 256 pref medium
+  unicast ff00::/8 dev eth0 proto kernel scope global metric 256 pref medium
+
+Set the type to RTN_MULTICAST which is more appropriate.
+
+Fixes: e8478e80e5a7 ("net/ipv6: Save route type in rt6_info")
+Signed-off-by: Matteo Croce <mcroce@microsoft.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv6/addrconf.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -2466,7 +2466,7 @@ static void addrconf_add_mroute(struct n
+               .fc_ifindex = dev->ifindex,
+               .fc_dst_len = 8,
+               .fc_flags = RTF_UP,
+-              .fc_type = RTN_UNICAST,
++              .fc_type = RTN_MULTICAST,
+               .fc_nlinfo.nl_net = dev_net(dev),
+               .fc_protocol = RTPROT_KERNEL,
+       };
diff --git a/queue-5.10/kasan-fix-incorrect-arguments-passing-in-kasan_add_zero_shadow.patch b/queue-5.10/kasan-fix-incorrect-arguments-passing-in-kasan_add_zero_shadow.patch

new file mode 100644 (file)

index 0000000..71f52d3
--- /dev/null
+++ b/queue-5.10/kasan-fix-incorrect-arguments-passing-in-kasan_add_zero_shadow.patch
@@ -0,0 +1,39 @@
+From 5dabd1712cd056814f9ab15f1d68157ceb04e741 Mon Sep 17 00:00:00 2001
+From: Lecopzer Chen <lecopzer@gmail.com>
+Date: Sat, 23 Jan 2021 21:01:29 -0800
+Subject: kasan: fix incorrect arguments passing in kasan_add_zero_shadow
+
+From: Lecopzer Chen <lecopzer@gmail.com>
+
+commit 5dabd1712cd056814f9ab15f1d68157ceb04e741 upstream.
+
+kasan_remove_zero_shadow() shall use original virtual address, start and
+size, instead of shadow address.
+
+Link: https://lkml.kernel.org/r/20210103063847.5963-1-lecopzer@gmail.com
+Fixes: 0207df4fa1a86 ("kernel/memremap, kasan: make ZONE_DEVICE with work with KASAN")
+Signed-off-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
+Reviewed-by: Andrey Konovalov <andreyknvl@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Alexander Potapenko <glider@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/kasan/init.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/mm/kasan/init.c
++++ b/mm/kasan/init.c
+@@ -492,7 +492,6 @@ int kasan_add_zero_shadow(void *start, u
+ 
+       ret = kasan_populate_early_shadow(shadow_start, shadow_end);
+       if (ret)
+-              kasan_remove_zero_shadow(shadow_start,
+-                                      size >> KASAN_SHADOW_SCALE_SHIFT);
++              kasan_remove_zero_shadow(start, size);
+       return ret;
+ }
diff --git a/queue-5.10/kasan-fix-unaligned-address-is-unhandled-in-kasan_remove_zero_shadow.patch b/queue-5.10/kasan-fix-unaligned-address-is-unhandled-in-kasan_remove_zero_shadow.patch

new file mode 100644 (file)

index 0000000..2510201
--- /dev/null
+++ b/queue-5.10/kasan-fix-unaligned-address-is-unhandled-in-kasan_remove_zero_shadow.patch
@@ -0,0 +1,101 @@
+From a11a496ee6e2ab6ed850233c96b94caf042af0b9 Mon Sep 17 00:00:00 2001
+From: Lecopzer Chen <lecopzer@gmail.com>
+Date: Sat, 23 Jan 2021 21:01:25 -0800
+Subject: kasan: fix unaligned address is unhandled in kasan_remove_zero_shadow
+
+From: Lecopzer Chen <lecopzer@gmail.com>
+
+commit a11a496ee6e2ab6ed850233c96b94caf042af0b9 upstream.
+
+During testing kasan_populate_early_shadow and kasan_remove_zero_shadow,
+if the shadow start and end address in kasan_remove_zero_shadow() is not
+aligned to PMD_SIZE, the remain unaligned PTE won't be removed.
+
+In the test case for kasan_remove_zero_shadow():
+
+    shadow_start: 0xffffffb802000000, shadow end: 0xffffffbfbe000000
+
+    3-level page table:
+      PUD_SIZE: 0x40000000 PMD_SIZE: 0x200000 PAGE_SIZE: 4K
+
+0xffffffbf80000000 ~ 0xffffffbfbdf80000 will not be removed because in
+kasan_remove_pud_table(), kasan_pmd_table(*pud) is true but the next
+address is 0xffffffbfbdf80000 which is not aligned to PUD_SIZE.
+
+In the correct condition, this should fallback to the next level
+kasan_remove_pmd_table() but the condition flow always continue to skip
+the unaligned part.
+
+Fix by correcting the condition when next and addr are neither aligned.
+
+Link: https://lkml.kernel.org/r/20210103135621.83129-1-lecopzer@gmail.com
+Fixes: 0207df4fa1a86 ("kernel/memremap, kasan: make ZONE_DEVICE with work with KASAN")
+Signed-off-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: YJ Chiang <yj.chiang@mediatek.com>
+Cc: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/kasan/init.c |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/mm/kasan/init.c
++++ b/mm/kasan/init.c
+@@ -377,9 +377,10 @@ static void kasan_remove_pmd_table(pmd_t
+ 
+               if (kasan_pte_table(*pmd)) {
+                       if (IS_ALIGNED(addr, PMD_SIZE) &&
+-                          IS_ALIGNED(next, PMD_SIZE))
++                          IS_ALIGNED(next, PMD_SIZE)) {
+                               pmd_clear(pmd);
+-                      continue;
++                              continue;
++                      }
+               }
+               pte = pte_offset_kernel(pmd, addr);
+               kasan_remove_pte_table(pte, addr, next);
+@@ -402,9 +403,10 @@ static void kasan_remove_pud_table(pud_t
+ 
+               if (kasan_pmd_table(*pud)) {
+                       if (IS_ALIGNED(addr, PUD_SIZE) &&
+-                          IS_ALIGNED(next, PUD_SIZE))
++                          IS_ALIGNED(next, PUD_SIZE)) {
+                               pud_clear(pud);
+-                      continue;
++                              continue;
++                      }
+               }
+               pmd = pmd_offset(pud, addr);
+               pmd_base = pmd_offset(pud, 0);
+@@ -428,9 +430,10 @@ static void kasan_remove_p4d_table(p4d_t
+ 
+               if (kasan_pud_table(*p4d)) {
+                       if (IS_ALIGNED(addr, P4D_SIZE) &&
+-                          IS_ALIGNED(next, P4D_SIZE))
++                          IS_ALIGNED(next, P4D_SIZE)) {
+                               p4d_clear(p4d);
+-                      continue;
++                              continue;
++                      }
+               }
+               pud = pud_offset(p4d, addr);
+               kasan_remove_pud_table(pud, addr, next);
+@@ -462,9 +465,10 @@ void kasan_remove_zero_shadow(void *star
+ 
+               if (kasan_p4d_table(*pgd)) {
+                       if (IS_ALIGNED(addr, PGDIR_SIZE) &&
+-                          IS_ALIGNED(next, PGDIR_SIZE))
++                          IS_ALIGNED(next, PGDIR_SIZE)) {
+                               pgd_clear(pgd);
+-                      continue;
++                              continue;
++                      }
+               }
+ 
+               p4d = p4d_offset(pgd, addr);
diff --git a/queue-5.10/net-core-devlink-use-right-genl-user_ptr-when-handling-port-param-get-set.patch b/queue-5.10/net-core-devlink-use-right-genl-user_ptr-when-handling-port-param-get-set.patch

new file mode 100644 (file)

index 0000000..f8ae901
--- /dev/null
+++ b/queue-5.10/net-core-devlink-use-right-genl-user_ptr-when-handling-port-param-get-set.patch
@@ -0,0 +1,46 @@
+From 7e238de8283acd32c26c2bc2a50672d0ea862ff7 Mon Sep 17 00:00:00 2001
+From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
+Date: Tue, 19 Jan 2021 10:53:33 +0200
+Subject: net: core: devlink: use right genl user_ptr when handling port param get/set
+
+From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
+
+commit 7e238de8283acd32c26c2bc2a50672d0ea862ff7 upstream.
+
+Fix incorrect user_ptr dereferencing when handling port param get/set:
+
+    idx [0] stores the 'struct devlink' pointer;
+    idx [1] stores the 'struct devlink_port' pointer;
+
+Fixes: 637989b5d77e ("devlink: Always use user_ptr[0] for devlink and simplify post_doit")
+CC: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
+Signed-off-by: Vadym Kochan <vadym.kochan@plvision.eu>
+Link: https://lore.kernel.org/r/20210119085333.16833-1-vadym.kochan@plvision.eu
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/devlink.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -4134,7 +4134,7 @@ out:
+ static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+                                             struct genl_info *info)
+ {
+-      struct devlink_port *devlink_port = info->user_ptr[0];
++      struct devlink_port *devlink_port = info->user_ptr[1];
+       struct devlink_param_item *param_item;
+       struct sk_buff *msg;
+       int err;
+@@ -4163,7 +4163,7 @@ static int devlink_nl_cmd_port_param_get
+ static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+                                             struct genl_info *info)
+ {
+-      struct devlink_port *devlink_port = info->user_ptr[0];
++      struct devlink_port *devlink_port = info->user_ptr[1];
+ 
+       return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
+                                              devlink_port->index,
diff --git a/queue-5.10/net-disable-netif_f_hw_tls_rx-when-rxcsum-is-disabled.patch b/queue-5.10/net-disable-netif_f_hw_tls_rx-when-rxcsum-is-disabled.patch

new file mode 100644 (file)

index 0000000..edf8e59
--- /dev/null
+++ b/queue-5.10/net-disable-netif_f_hw_tls_rx-when-rxcsum-is-disabled.patch
@@ -0,0 +1,37 @@
+From a3eb4e9d4c9218476d05c52dfd2be3d6fdce6b91 Mon Sep 17 00:00:00 2001
+From: Tariq Toukan <tariqt@nvidia.com>
+Date: Sun, 17 Jan 2021 17:15:38 +0200
+Subject: net: Disable NETIF_F_HW_TLS_RX when RXCSUM is disabled
+
+From: Tariq Toukan <tariqt@nvidia.com>
+
+commit a3eb4e9d4c9218476d05c52dfd2be3d6fdce6b91 upstream.
+
+With NETIF_F_HW_TLS_RX packets are decrypted in HW. This cannot be
+logically done when RXCSUM offload is off.
+
+Fixes: 14136564c8ee ("net: Add TLS RX offload feature")
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Boris Pismenny <borisp@nvidia.com>
+Link: https://lore.kernel.org/r/20210117151538.9411-1-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/dev.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9602,6 +9602,11 @@ static netdev_features_t netdev_fix_feat
+               }
+       }
+ 
++      if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
++              netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
++              features &= ~NETIF_F_HW_TLS_RX;
++      }
++
+       return features;
+ }
+ 
diff --git a/queue-5.10/net-dsa-b53-fix-an-off-by-one-in-checking-vlan-vid.patch b/queue-5.10/net-dsa-b53-fix-an-off-by-one-in-checking-vlan-vid.patch

new file mode 100644 (file)

index 0000000..ad23e68
--- /dev/null
+++ b/queue-5.10/net-dsa-b53-fix-an-off-by-one-in-checking-vlan-vid.patch
@@ -0,0 +1,36 @@
+From 8e4052c32d6b4b39c1e13c652c7e33748d447409 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 19 Jan 2021 17:48:03 +0300
+Subject: net: dsa: b53: fix an off by one in checking "vlan->vid"
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 8e4052c32d6b4b39c1e13c652c7e33748d447409 upstream.
+
+The > comparison should be >= to prevent accessing one element beyond
+the end of the dev->vlans[] array in the caller function, b53_vlan_add().
+The "dev->vlans" array is allocated in the b53_switch_init() function
+and it has "dev->num_vlans" elements.
+
+Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/YAbxI97Dl/pmBy5V@mwanda
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/dsa/b53/b53_common.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -1404,7 +1404,7 @@ int b53_vlan_prepare(struct dsa_switch *
+           !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED))
+               return -EINVAL;
+ 
+-      if (vlan->vid_end > dev->num_vlans)
++      if (vlan->vid_end >= dev->num_vlans)
+               return -ERANGE;
+ 
+       b53_enable_vlan(dev, true, ds->vlan_filtering);
diff --git a/queue-5.10/net-mscc-ocelot-allow-offloading-of-bridge-on-top-of-lag.patch b/queue-5.10/net-mscc-ocelot-allow-offloading-of-bridge-on-top-of-lag.patch

new file mode 100644 (file)

index 0000000..d5effe9
--- /dev/null
+++ b/queue-5.10/net-mscc-ocelot-allow-offloading-of-bridge-on-top-of-lag.patch
@@ -0,0 +1,44 @@
+From 79267ae22615496655feee2db0848f6786bcf67a Mon Sep 17 00:00:00 2001
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Mon, 18 Jan 2021 15:52:10 +0200
+Subject: net: mscc: ocelot: allow offloading of bridge on top of LAG
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+commit 79267ae22615496655feee2db0848f6786bcf67a upstream.
+
+The blamed commit was too aggressive, and it made ocelot_netdevice_event
+react only to network interface events emitted for the ocelot switch
+ports.
+
+In fact, only the PRECHANGEUPPER should have had that check.
+
+When we ignore all events that are not for us, we miss the fact that the
+upper of the LAG changes, and the bonding interface gets enslaved to a
+bridge. This is an operation we could offload under certain conditions.
+
+Fixes: 7afb3e575e5a ("net: mscc: ocelot: don't handle netdev events for other netdevs")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20210118135210.2666246-1-olteanv@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mscc/ocelot_net.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mscc/ocelot_net.c
++++ b/drivers/net/ethernet/mscc/ocelot_net.c
+@@ -952,10 +952,8 @@ static int ocelot_netdevice_event(struct
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       int ret = 0;
+ 
+-      if (!ocelot_netdevice_dev_check(dev))
+-              return 0;
+-
+       if (event == NETDEV_PRECHANGEUPPER &&
++          ocelot_netdevice_dev_check(dev) &&
+           netif_is_lag_master(info->upper_dev)) {
+               struct netdev_lag_upper_info *lag_upper_info = info->upper_info;
+               struct netlink_ext_ack *extack;
diff --git a/queue-5.10/net-mscc-ocelot-fix-multicast-to-the-cpu-port.patch b/queue-5.10/net-mscc-ocelot-fix-multicast-to-the-cpu-port.patch

new file mode 100644 (file)

index 0000000..ed9480d
--- /dev/null
+++ b/queue-5.10/net-mscc-ocelot-fix-multicast-to-the-cpu-port.patch
@@ -0,0 +1,71 @@
+From 584b7cfcdc7d6d416a9d6fece9516764bd977d2e Mon Sep 17 00:00:00 2001
+From: Alban Bedel <alban.bedel@aerq.com>
+Date: Tue, 19 Jan 2021 15:06:38 +0100
+Subject: net: mscc: ocelot: Fix multicast to the CPU port
+
+From: Alban Bedel <alban.bedel@aerq.com>
+
+commit 584b7cfcdc7d6d416a9d6fece9516764bd977d2e upstream.
+
+Multicast entries in the MAC table use the high bits of the MAC
+address to encode the ports that should get the packets. But this port
+mask does not work for the CPU port, to receive these packets on the
+CPU port the MAC_CPU_COPY flag must be set.
+
+Because of this IPv6 was effectively not working because neighbor
+solicitations were never received. This was not apparent before commit
+9403c158 (net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb
+entries) as the IPv6 entries were broken so all incoming IPv6
+multicast was then treated as unknown and flooded on all ports.
+
+To fix this problem rework the ocelot_mact_learn() to set the
+MAC_CPU_COPY flag when a multicast entry that target the CPU port is
+added. For this we have to read back the ports endcoded in the pseudo
+MAC address by the caller. It is not a very nice design but that avoid
+changing the callers and should make backporting easier.
+
+Signed-off-by: Alban Bedel <alban.bedel@aerq.com>
+Fixes: 9403c158b872 ("net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries")
+Link: https://lore.kernel.org/r/20210119140638.203374-1-alban.bedel@aerq.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mscc/ocelot.c |   23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/mscc/ocelot.c
++++ b/drivers/net/ethernet/mscc/ocelot.c
+@@ -60,14 +60,27 @@ int ocelot_mact_learn(struct ocelot *oce
+                     const unsigned char mac[ETH_ALEN],
+                     unsigned int vid, enum macaccess_entry_type type)
+ {
++      u32 cmd = ANA_TABLES_MACACCESS_VALID |
++              ANA_TABLES_MACACCESS_DEST_IDX(port) |
++              ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
++              ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN);
++      unsigned int mc_ports;
++
++      /* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */
++      if (type == ENTRYTYPE_MACv4)
++              mc_ports = (mac[1] << 8) | mac[2];
++      else if (type == ENTRYTYPE_MACv6)
++              mc_ports = (mac[0] << 8) | mac[1];
++      else
++              mc_ports = 0;
++
++      if (mc_ports & BIT(ocelot->num_phys_ports))
++              cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY;
++
+       ocelot_mact_select(ocelot, mac, vid);
+ 
+       /* Issue a write command */
+-      ocelot_write(ocelot, ANA_TABLES_MACACCESS_VALID |
+-                           ANA_TABLES_MACACCESS_DEST_IDX(port) |
+-                           ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
+-                           ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN),
+-                           ANA_TABLES_MACACCESS);
++      ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS);
+ 
+       return ocelot_mact_wait_for_completion(ocelot);
+ }
diff --git a/queue-5.10/net_sched-avoid-shift-out-of-bounds-in-tcindex_set_parms.patch b/queue-5.10/net_sched-avoid-shift-out-of-bounds-in-tcindex_set_parms.patch

new file mode 100644 (file)

index 0000000..ed5de6a
--- /dev/null
+++ b/queue-5.10/net_sched-avoid-shift-out-of-bounds-in-tcindex_set_parms.patch
@@ -0,0 +1,67 @@
+From bcd0cf19ef8258ac31b9a20248b05c15a1f4b4b0 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 14 Jan 2021 10:52:29 -0800
+Subject: net_sched: avoid shift-out-of-bounds in tcindex_set_parms()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit bcd0cf19ef8258ac31b9a20248b05c15a1f4b4b0 upstream.
+
+tc_index being 16bit wide, we need to check that TCA_TCINDEX_SHIFT
+attribute is not silly.
+
+UBSAN: shift-out-of-bounds in net/sched/cls_tcindex.c:260:29
+shift exponent 255 is too large for 32-bit type 'int'
+CPU: 0 PID: 8516 Comm: syz-executor228 Not tainted 5.10.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:79 [inline]
+ dump_stack+0x107/0x163 lib/dump_stack.c:120
+ ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
+ __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395
+ valid_perfect_hash net/sched/cls_tcindex.c:260 [inline]
+ tcindex_set_parms.cold+0x1b/0x215 net/sched/cls_tcindex.c:425
+ tcindex_change+0x232/0x340 net/sched/cls_tcindex.c:546
+ tc_new_tfilter+0x13fb/0x21b0 net/sched/cls_api.c:2127
+ rtnetlink_rcv_msg+0x8b6/0xb80 net/core/rtnetlink.c:5555
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
+ netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330
+ netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:652 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:672
+ ____sys_sendmsg+0x6e8/0x810 net/socket.c:2336
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2390
+ __sys_sendmsg+0xe5/0x1b0 net/socket.c:2423
+ do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20210114185229.1742255-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sched/cls_tcindex.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -366,9 +366,13 @@ tcindex_set_parms(struct net *net, struc
+       if (tb[TCA_TCINDEX_MASK])
+               cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+ 
+-      if (tb[TCA_TCINDEX_SHIFT])
++      if (tb[TCA_TCINDEX_SHIFT]) {
+               cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+-
++              if (cp->shift > 16) {
++                      err = -EINVAL;
++                      goto errout;
++              }
++      }
+       if (!cp->hash) {
+               /* Hash not specified, use perfect hash if the upper limit
+                * of the hashing index is below the threshold.
diff --git a/queue-5.10/net_sched-gen_estimator-support-large-ewma-log.patch b/queue-5.10/net_sched-gen_estimator-support-large-ewma-log.patch

new file mode 100644 (file)

index 0000000..20b7354
--- /dev/null
+++ b/queue-5.10/net_sched-gen_estimator-support-large-ewma-log.patch
@@ -0,0 +1,89 @@
+From dd5e073381f2ada3630f36be42833c6e9c78b75e Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 14 Jan 2021 10:19:29 -0800
+Subject: net_sched: gen_estimator: support large ewma log
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit dd5e073381f2ada3630f36be42833c6e9c78b75e upstream.
+
+syzbot report reminded us that very big ewma_log were supported in the past,
+even if they made litle sense.
+
+tc qdisc replace dev xxx root est 1sec 131072sec ...
+
+While fixing the bug, also add boundary checks for ewma_log, in line
+with range supported by iproute2.
+
+UBSAN: shift-out-of-bounds in net/core/gen_estimator.c:83:38
+shift exponent -1 is negative
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:79 [inline]
+ dump_stack+0x107/0x163 lib/dump_stack.c:120
+ ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
+ __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395
+ est_timer.cold+0xbb/0x12d net/core/gen_estimator.c:83
+ call_timer_fn+0x1a5/0x710 kernel/time/timer.c:1417
+ expire_timers kernel/time/timer.c:1462 [inline]
+ __run_timers.part.0+0x692/0xa80 kernel/time/timer.c:1731
+ __run_timers kernel/time/timer.c:1712 [inline]
+ run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1744
+ __do_softirq+0x2bc/0xa77 kernel/softirq.c:343
+ asm_call_irq_on_stack+0xf/0x20
+ </IRQ>
+ __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline]
+ run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline]
+ do_softirq_own_stack+0xaa/0xd0 arch/x86/kernel/irq_64.c:77
+ invoke_softirq kernel/softirq.c:226 [inline]
+ __irq_exit_rcu+0x17f/0x200 kernel/softirq.c:420
+ irq_exit_rcu+0x5/0x20 kernel/softirq.c:432
+ sysvec_apic_timer_interrupt+0x4d/0x100 arch/x86/kernel/apic/apic.c:1096
+ asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:628
+RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline]
+RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:79 [inline]
+RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:169 [inline]
+RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline]
+RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516
+
+Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20210114181929.1717985-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/gen_estimator.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -80,11 +80,11 @@ static void est_timer(struct timer_list
+       u64 rate, brate;
+ 
+       est_fetch_counters(est, &b);
+-      brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
+-      brate -= (est->avbps >> est->ewma_log);
++      brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
++      brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
+ 
+-      rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
+-      rate -= (est->avpps >> est->ewma_log);
++      rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
++      rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
+ 
+       write_seqcount_begin(&est->seq);
+       est->avbps += brate;
+@@ -143,6 +143,9 @@ int gen_new_estimator(struct gnet_stats_
+       if (parm->interval < -2 || parm->interval > 3)
+               return -EINVAL;
+ 
++      if (parm->ewma_log == 0 || parm->ewma_log >= 31)
++              return -EINVAL;
++
+       est = kzalloc(sizeof(*est), GFP_KERNEL);
+       if (!est)
+               return -ENOBUFS;
diff --git a/queue-5.10/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch b/queue-5.10/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch

new file mode 100644 (file)

index 0000000..5441584
--- /dev/null
+++ b/queue-5.10/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch
@@ -0,0 +1,64 @@
+From e4bedf48aaa5552bc1f49703abd17606e7e6e82a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 14 Jan 2021 08:06:37 -0800
+Subject: net_sched: reject silly cell_log in qdisc_get_rtab()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit e4bedf48aaa5552bc1f49703abd17606e7e6e82a upstream.
+
+iproute2 probably never goes beyond 8 for the cell exponent,
+but stick to the max shift exponent for signed 32bit.
+
+UBSAN reported:
+UBSAN: shift-out-of-bounds in net/sched/sch_api.c:389:22
+shift exponent 130 is too large for 32-bit type 'int'
+CPU: 1 PID: 8450 Comm: syz-executor586 Not tainted 5.11.0-rc3-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:79 [inline]
+ dump_stack+0x183/0x22e lib/dump_stack.c:120
+ ubsan_epilogue lib/ubsan.c:148 [inline]
+ __ubsan_handle_shift_out_of_bounds+0x432/0x4d0 lib/ubsan.c:395
+ __detect_linklayer+0x2a9/0x330 net/sched/sch_api.c:389
+ qdisc_get_rtab+0x2b5/0x410 net/sched/sch_api.c:435
+ cbq_init+0x28f/0x12c0 net/sched/sch_cbq.c:1180
+ qdisc_create+0x801/0x1470 net/sched/sch_api.c:1246
+ tc_modify_qdisc+0x9e3/0x1fc0 net/sched/sch_api.c:1662
+ rtnetlink_rcv_msg+0xb1d/0xe60 net/core/rtnetlink.c:5564
+ netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
+ netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1330
+ netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:652 [inline]
+ sock_sendmsg net/socket.c:672 [inline]
+ ____sys_sendmsg+0x5a2/0x900 net/socket.c:2345
+ ___sys_sendmsg net/socket.c:2399 [inline]
+ __sys_sendmsg+0x319/0x400 net/socket.c:2432
+ do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Cong Wang <cong.wang@bytedance.com>
+Link: https://lore.kernel.org/r/20210114160637.1660597-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sched/sch_api.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -412,7 +412,8 @@ struct qdisc_rate_table *qdisc_get_rtab(
+ {
+       struct qdisc_rate_table *rtab;
+ 
+-      if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
++      if (tab == NULL || r->rate == 0 ||
++          r->cell_log == 0 || r->cell_log >= 32 ||
+           nla_len(tab) != TC_RTAB_SIZE) {
+               NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
+               return NULL;
diff --git a/queue-5.10/series b/queue-5.10/series

index 77b520fe05d1500b1944d54b71a2bfd843c09aa4..a7ad2c8a2cf6619e53cd298366b75fb950525a7a 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -164,3 +164,19 @@ nvme-pci-fix-error-unwind-in-nvme_map_data.patch
  cachefiles-drop-superfluous-readpages-aops-null-check.patch
  lightnvm-fix-memory-leak-when-submit-fails.patch
  skbuff-back-tiny-skbs-with-kmalloc-in-__netdev_alloc_skb-too.patch
+kasan-fix-unaligned-address-is-unhandled-in-kasan_remove_zero_shadow.patch
+kasan-fix-incorrect-arguments-passing-in-kasan_add_zero_shadow.patch
+tcp-fix-tcp-socket-rehash-stats-mis-accounting.patch
+net_sched-gen_estimator-support-large-ewma-log.patch
+udp-mask-tos-bits-in-udp_v4_early_demux.patch
+ipv6-create-multicast-route-with-rtprot_kernel.patch
+net_sched-avoid-shift-out-of-bounds-in-tcindex_set_parms.patch
+net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch
+ipv6-set-multicast-flag-on-the-multicast-route.patch
+net-mscc-ocelot-allow-offloading-of-bridge-on-top-of-lag.patch
+net-disable-netif_f_hw_tls_rx-when-rxcsum-is-disabled.patch
+net-dsa-b53-fix-an-off-by-one-in-checking-vlan-vid.patch
+tcp-do-not-mess-with-cloned-skbs-in-tcp_add_backlog.patch
+tcp-fix-tcp_user_timeout-with-zero-window.patch
+net-mscc-ocelot-fix-multicast-to-the-cpu-port.patch
+net-core-devlink-use-right-genl-user_ptr-when-handling-port-param-get-set.patch
diff --git a/queue-5.10/tcp-do-not-mess-with-cloned-skbs-in-tcp_add_backlog.patch b/queue-5.10/tcp-do-not-mess-with-cloned-skbs-in-tcp_add_backlog.patch

new file mode 100644 (file)

index 0000000..1435daf
--- /dev/null
+++ b/queue-5.10/tcp-do-not-mess-with-cloned-skbs-in-tcp_add_backlog.patch
@@ -0,0 +1,113 @@
+From b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 19 Jan 2021 08:49:00 -0800
+Subject: tcp: do not mess with cloned skbs in tcp_add_backlog()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a upstream.
+
+Heiner Kallweit reported that some skbs were sent with
+the following invalid GSO properties :
+- gso_size > 0
+- gso_type == 0
+
+This was triggerring a WARN_ON_ONCE() in rtl8169_tso_csum_v2.
+
+Juerg Haefliger was able to reproduce a similar issue using
+a lan78xx NIC and a workload mixing TCP incoming traffic
+and forwarded packets.
+
+The problem is that tcp_add_backlog() is writing
+over gso_segs and gso_size even if the incoming packet will not
+be coalesced to the backlog tail packet.
+
+While skb_try_coalesce() would bail out if tail packet is cloned,
+this overwriting would lead to corruptions of other packets
+cooked by lan78xx, sharing a common super-packet.
+
+The strategy used by lan78xx is to use a big skb, and split
+it into all received packets using skb_clone() to avoid copies.
+The drawback of this strategy is that all the small skb share a common
+struct skb_shared_info.
+
+This patch rewrites TCP gso_size/gso_segs handling to only
+happen on the tail skb, since skb_try_coalesce() made sure
+it was not cloned.
+
+Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Bisected-by: Juerg Haefliger <juergh@canonical.com>
+Tested-by: Juerg Haefliger <juergh@canonical.com>
+Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=209423
+Link: https://lore.kernel.org/r/20210119164900.766957-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/tcp_ipv4.c |   25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1755,6 +1755,7 @@ int tcp_v4_early_demux(struct sk_buff *s
+ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+ {
+       u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
++      u32 tail_gso_size, tail_gso_segs;
+       struct skb_shared_info *shinfo;
+       const struct tcphdr *th;
+       struct tcphdr *thtail;
+@@ -1762,6 +1763,7 @@ bool tcp_add_backlog(struct sock *sk, st
+       unsigned int hdrlen;
+       bool fragstolen;
+       u32 gso_segs;
++      u32 gso_size;
+       int delta;
+ 
+       /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
+@@ -1787,13 +1789,6 @@ bool tcp_add_backlog(struct sock *sk, st
+        */
+       th = (const struct tcphdr *)skb->data;
+       hdrlen = th->doff * 4;
+-      shinfo = skb_shinfo(skb);
+-
+-      if (!shinfo->gso_size)
+-              shinfo->gso_size = skb->len - hdrlen;
+-
+-      if (!shinfo->gso_segs)
+-              shinfo->gso_segs = 1;
+ 
+       tail = sk->sk_backlog.tail;
+       if (!tail)
+@@ -1816,6 +1811,15 @@ bool tcp_add_backlog(struct sock *sk, st
+               goto no_coalesce;
+ 
+       __skb_pull(skb, hdrlen);
++
++      shinfo = skb_shinfo(skb);
++      gso_size = shinfo->gso_size ?: skb->len;
++      gso_segs = shinfo->gso_segs ?: 1;
++
++      shinfo = skb_shinfo(tail);
++      tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
++      tail_gso_segs = shinfo->gso_segs ?: 1;
++
+       if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
+               TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
+ 
+@@ -1842,11 +1846,8 @@ bool tcp_add_backlog(struct sock *sk, st
+               }
+ 
+               /* Not as strict as GRO. We only need to carry mss max value */
+-              skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
+-                                               skb_shinfo(tail)->gso_size);
+-
+-              gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
+-              skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
++              shinfo->gso_size = max(gso_size, tail_gso_size);
++              shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
+ 
+               sk->sk_backlog.len += delta;
+               __NET_INC_STATS(sock_net(sk),
diff --git a/queue-5.10/tcp-fix-tcp-socket-rehash-stats-mis-accounting.patch b/queue-5.10/tcp-fix-tcp-socket-rehash-stats-mis-accounting.patch

new file mode 100644 (file)

index 0000000..5397abd
--- /dev/null
+++ b/queue-5.10/tcp-fix-tcp-socket-rehash-stats-mis-accounting.patch
@@ -0,0 +1,141 @@
+From 9c30ae8398b0813e237bde387d67a7f74ab2db2d Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Tue, 19 Jan 2021 11:26:19 -0800
+Subject: tcp: fix TCP socket rehash stats mis-accounting
+
+From: Yuchung Cheng <ycheng@google.com>
+
+commit 9c30ae8398b0813e237bde387d67a7f74ab2db2d upstream.
+
+The previous commit 32efcc06d2a1 ("tcp: export count for rehash attempts")
+would mis-account rehashing SNMP and socket stats:
+
+  a. During handshake of an active open, only counts the first
+     SYN timeout
+
+  b. After handshake of passive and active open, stop updating
+     after (roughly) TCP_RETRIES1 recurring RTOs
+
+  c. After the socket aborts, over count timeout_rehash by 1
+
+This patch fixes this by checking the rehash result from sk_rethink_txhash.
+
+Fixes: 32efcc06d2a1 ("tcp: export count for rehash attempts")
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Link: https://lore.kernel.org/r/20210119192619.1848270-1-ycheng@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/sock.h   |   17 ++++++++++++-----
+ net/ipv4/tcp_input.c |    5 ++---
+ net/ipv4/tcp_timer.c |   22 ++++++++--------------
+ 3 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1903,10 +1903,13 @@ static inline void sk_set_txhash(struct
+       sk->sk_txhash = net_tx_rndhash();
+ }
+ 
+-static inline void sk_rethink_txhash(struct sock *sk)
++static inline bool sk_rethink_txhash(struct sock *sk)
+ {
+-      if (sk->sk_txhash)
++      if (sk->sk_txhash) {
+               sk_set_txhash(sk);
++              return true;
++      }
++      return false;
+ }
+ 
+ static inline struct dst_entry *
+@@ -1929,12 +1932,10 @@ sk_dst_get(struct sock *sk)
+       return dst;
+ }
+ 
+-static inline void dst_negative_advice(struct sock *sk)
++static inline void __dst_negative_advice(struct sock *sk)
+ {
+       struct dst_entry *ndst, *dst = __sk_dst_get(sk);
+ 
+-      sk_rethink_txhash(sk);
+-
+       if (dst && dst->ops->negative_advice) {
+               ndst = dst->ops->negative_advice(dst);
+ 
+@@ -1946,6 +1947,12 @@ static inline void dst_negative_advice(s
+       }
+ }
+ 
++static inline void dst_negative_advice(struct sock *sk)
++{
++      sk_rethink_txhash(sk);
++      __dst_negative_advice(sk);
++}
++
+ static inline void
+ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
+ {
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4379,10 +4379,9 @@ static void tcp_rcv_spurious_retrans(str
+        * The receiver remembers and reflects via DSACKs. Leverage the
+        * DSACK state and change the txhash to re-route speculatively.
+        */
+-      if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) {
+-              sk_rethink_txhash(sk);
++      if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
++          sk_rethink_txhash(sk))
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+-      }
+ }
+ 
+ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -219,14 +219,8 @@ static int tcp_write_timeout(struct sock
+       int retry_until;
+ 
+       if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+-              if (icsk->icsk_retransmits) {
+-                      dst_negative_advice(sk);
+-              } else {
+-                      sk_rethink_txhash(sk);
+-                      tp->timeout_rehash++;
+-                      __NET_INC_STATS(sock_net(sk),
+-                                      LINUX_MIB_TCPTIMEOUTREHASH);
+-              }
++              if (icsk->icsk_retransmits)
++                      __dst_negative_advice(sk);
+               retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+               expired = icsk->icsk_retransmits >= retry_until;
+       } else {
+@@ -234,12 +228,7 @@ static int tcp_write_timeout(struct sock
+                       /* Black hole detection */
+                       tcp_mtu_probing(icsk, sk);
+ 
+-                      dst_negative_advice(sk);
+-              } else {
+-                      sk_rethink_txhash(sk);
+-                      tp->timeout_rehash++;
+-                      __NET_INC_STATS(sock_net(sk),
+-                                      LINUX_MIB_TCPTIMEOUTREHASH);
++                      __dst_negative_advice(sk);
+               }
+ 
+               retry_until = net->ipv4.sysctl_tcp_retries2;
+@@ -270,6 +259,11 @@ static int tcp_write_timeout(struct sock
+               return 1;
+       }
+ 
++      if (sk_rethink_txhash(sk)) {
++              tp->timeout_rehash++;
++              __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
++      }
++
+       return 0;
+ }
+ 
diff --git a/queue-5.10/tcp-fix-tcp_user_timeout-with-zero-window.patch b/queue-5.10/tcp-fix-tcp_user_timeout-with-zero-window.patch

new file mode 100644 (file)

index 0000000..a47e4d3
--- /dev/null
+++ b/queue-5.10/tcp-fix-tcp_user_timeout-with-zero-window.patch
@@ -0,0 +1,138 @@
+From 9d9b1ee0b2d1c9e02b2338c4a4b0a062d2d3edac Mon Sep 17 00:00:00 2001
+From: Enke Chen <enchen@paloaltonetworks.com>
+Date: Fri, 15 Jan 2021 14:30:58 -0800
+Subject: tcp: fix TCP_USER_TIMEOUT with zero window
+
+From: Enke Chen <enchen@paloaltonetworks.com>
+
+commit 9d9b1ee0b2d1c9e02b2338c4a4b0a062d2d3edac upstream.
+
+The TCP session does not terminate with TCP_USER_TIMEOUT when data
+remain untransmitted due to zero window.
+
+The number of unanswered zero-window probes (tcp_probes_out) is
+reset to zero with incoming acks irrespective of the window size,
+as described in tcp_probe_timer():
+
+    RFC 1122 4.2.2.17 requires the sender to stay open indefinitely
+    as long as the receiver continues to respond probes. We support
+    this by default and reset icsk_probes_out with incoming ACKs.
+
+This counter, however, is the wrong one to be used in calculating the
+duration that the window remains closed and data remain untransmitted.
+Thanks to Jonathan Maxwell <jmaxwell37@gmail.com> for diagnosing the
+actual issue.
+
+In this patch a new timestamp is introduced for the socket in order to
+track the elapsed time for the zero-window probes that have not been
+answered with any non-zero window ack.
+
+Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT")
+Reported-by: William McCall <william.mccall@gmail.com>
+Co-developed-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Enke Chen <enchen@paloaltonetworks.com>
+Reviewed-by: Yuchung Cheng <ycheng@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20210115223058.GA39267@localhost.localdomain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/inet_connection_sock.h |    3 +++
+ net/ipv4/inet_connection_sock.c    |    1 +
+ net/ipv4/tcp.c                     |    1 +
+ net/ipv4/tcp_input.c               |    1 +
+ net/ipv4/tcp_output.c              |    1 +
+ net/ipv4/tcp_timer.c               |   14 +++++++-------
+ 6 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops {
+  * @icsk_ext_hdr_len:    Network protocol overhead (IP/IPv6 options)
+  * @icsk_ack:            Delayed ACK control data
+  * @icsk_mtup;                   MTU probing control data
++ * @icsk_probes_tstamp:    Probe timestamp (cleared by non-zero window ack)
++ * @icsk_user_timeout:           TCP_USER_TIMEOUT value
+  */
+ struct inet_connection_sock {
+       /* inet_sock has to be the first member! */
+@@ -129,6 +131,7 @@ struct inet_connection_sock {
+ 
+               u32               probe_timestamp;
+       } icsk_mtup;
++      u32                       icsk_probes_tstamp;
+       u32                       icsk_user_timeout;
+ 
+       u64                       icsk_ca_priv[104 / sizeof(u64)];
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -851,6 +851,7 @@ struct sock *inet_csk_clone_lock(const s
+               newicsk->icsk_retransmits = 0;
+               newicsk->icsk_backoff     = 0;
+               newicsk->icsk_probes_out  = 0;
++              newicsk->icsk_probes_tstamp = 0;
+ 
+               /* Deinitialize accept_queue to trap illegal accesses. */
+               memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2685,6 +2685,7 @@ int tcp_disconnect(struct sock *sk, int
+ 
+       icsk->icsk_backoff = 0;
+       icsk->icsk_probes_out = 0;
++      icsk->icsk_probes_tstamp = 0;
+       icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       icsk->icsk_rto_min = TCP_RTO_MIN;
+       icsk->icsk_delack_max = TCP_DELACK_MAX;
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3370,6 +3370,7 @@ static void tcp_ack_probe(struct sock *s
+               return;
+       if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
+               icsk->icsk_backoff = 0;
++              icsk->icsk_probes_tstamp = 0;
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
+               /* Socket must be waked up by subsequent tcp_data_snd_check().
+                * This function is not for random using!
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -4080,6 +4080,7 @@ void tcp_send_probe0(struct sock *sk)
+               /* Cancel probe timer, if it is not required. */
+               icsk->icsk_probes_out = 0;
+               icsk->icsk_backoff = 0;
++              icsk->icsk_probes_tstamp = 0;
+               return;
+       }
+ 
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -343,6 +343,7 @@ static void tcp_probe_timer(struct sock
+ 
+       if (tp->packets_out || !skb) {
+               icsk->icsk_probes_out = 0;
++              icsk->icsk_probes_tstamp = 0;
+               return;
+       }
+ 
+@@ -354,13 +355,12 @@ static void tcp_probe_timer(struct sock
+        * corresponding system limit. We also implement similar policy when
+        * we use RTO to probe window in tcp_retransmit_timer().
+        */
+-      if (icsk->icsk_user_timeout) {
+-              u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
+-                                              tcp_probe0_base(sk));
+-
+-              if (elapsed >= icsk->icsk_user_timeout)
+-                      goto abort;
+-      }
++      if (!icsk->icsk_probes_tstamp)
++              icsk->icsk_probes_tstamp = tcp_jiffies32;
++      else if (icsk->icsk_user_timeout &&
++               (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
++               msecs_to_jiffies(icsk->icsk_user_timeout))
++              goto abort;
+ 
+       max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+       if (sock_flag(sk, SOCK_DEAD)) {
diff --git a/queue-5.10/udp-mask-tos-bits-in-udp_v4_early_demux.patch b/queue-5.10/udp-mask-tos-bits-in-udp_v4_early_demux.patch

new file mode 100644 (file)

index 0000000..efb1f5a
--- /dev/null
+++ b/queue-5.10/udp-mask-tos-bits-in-udp_v4_early_demux.patch
@@ -0,0 +1,88 @@
+From 8d2b51b008c25240914984208b2ced57d1dd25a5 Mon Sep 17 00:00:00 2001
+From: Guillaume Nault <gnault@redhat.com>
+Date: Sat, 16 Jan 2021 11:44:22 +0100
+Subject: udp: mask TOS bits in udp_v4_early_demux()
+
+From: Guillaume Nault <gnault@redhat.com>
+
+commit 8d2b51b008c25240914984208b2ced57d1dd25a5 upstream.
+
+udp_v4_early_demux() is the only function that calls
+ip_mc_validate_source() with a TOS that hasn't been masked with
+IPTOS_RT_MASK.
+
+This results in different behaviours for incoming multicast UDPv4
+packets, depending on if ip_mc_validate_source() is called from the
+early-demux path (udp_v4_early_demux) or from the regular input path
+(ip_route_input_noref).
+
+ECN would normally not be used with UDP multicast packets, so the
+practical consequences should be limited on that side. However,
+IPTOS_RT_MASK is used to also masks the TOS' high order bits, to align
+with the non-early-demux path behaviour.
+
+Reproducer:
+
+  Setup two netns, connected with veth:
+  $ ip netns add ns0
+  $ ip netns add ns1
+  $ ip -netns ns0 link set dev lo up
+  $ ip -netns ns1 link set dev lo up
+  $ ip link add name veth01 netns ns0 type veth peer name veth10 netns ns1
+  $ ip -netns ns0 link set dev veth01 up
+  $ ip -netns ns1 link set dev veth10 up
+  $ ip -netns ns0 address add 192.0.2.10 peer 192.0.2.11/32 dev veth01
+  $ ip -netns ns1 address add 192.0.2.11 peer 192.0.2.10/32 dev veth10
+
+  In ns0, add route to multicast address 224.0.2.0/24 using source
+  address 198.51.100.10:
+  $ ip -netns ns0 address add 198.51.100.10/32 dev lo
+  $ ip -netns ns0 route add 224.0.2.0/24 dev veth01 src 198.51.100.10
+
+  In ns1, define route to 198.51.100.10, only for packets with TOS 4:
+  $ ip -netns ns1 route add 198.51.100.10/32 tos 4 dev veth10
+
+  Also activate rp_filter in ns1, so that incoming packets not matching
+  the above route get dropped:
+  $ ip netns exec ns1 sysctl -wq net.ipv4.conf.veth10.rp_filter=1
+
+  Now try to receive packets on 224.0.2.11:
+  $ ip netns exec ns1 socat UDP-RECVFROM:1111,ip-add-membership=224.0.2.11:veth10,ignoreeof -
+
+  In ns0, send packet to 224.0.2.11 with TOS 4 and ECT(0) (that is,
+  tos 6 for socat):
+  $ echo test0 | ip netns exec ns0 socat - UDP-DATAGRAM:224.0.2.11:1111,bind=:1111,tos=6
+
+  The "test0" message is properly received by socat in ns1, because
+  early-demux has no cached dst to use, so source address validation
+  is done by ip_route_input_mc(), which receives a TOS that has the
+  ECN bits masked.
+
+  Now send another packet to 224.0.2.11, still with TOS 4 and ECT(0):
+  $ echo test1 | ip netns exec ns0 socat - UDP-DATAGRAM:224.0.2.11:1111,bind=:1111,tos=6
+
+  The "test1" message isn't received by socat in ns1, because, now,
+  early-demux has a cached dst to use and calls ip_mc_validate_source()
+  immediately, without masking the ECN bits.
+
+Fixes: bc044e8db796 ("udp: perform source validation for mcast early demux")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/udp.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2553,7 +2553,8 @@ int udp_v4_early_demux(struct sk_buff *s
+                */
+               if (!inet_sk(sk)->inet_daddr && in_dev)
+                       return ip_mc_validate_source(skb, iph->daddr,
+-                                                   iph->saddr, iph->tos,
++                                                   iph->saddr,
++                                                   iph->tos & IPTOS_RT_MASK,
+                                                    skb->dev, in_dev, &itag);
+       }
+       return 0;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 25 Jan 2021 15:29:34 +0000 (16:29 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 25 Jan 2021 15:29:34 +0000 (16:29 +0100)
queue-5.10/ipv6-create-multicast-route-with-rtprot_kernel.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/ipv6-set-multicast-flag-on-the-multicast-route.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kasan-fix-incorrect-arguments-passing-in-kasan_add_zero_shadow.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kasan-fix-unaligned-address-is-unhandled-in-kasan_remove_zero_shadow.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-core-devlink-use-right-genl-user_ptr-when-handling-port-param-get-set.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-disable-netif_f_hw_tls_rx-when-rxcsum-is-disabled.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-dsa-b53-fix-an-off-by-one-in-checking-vlan-vid.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-mscc-ocelot-allow-offloading-of-bridge-on-top-of-lag.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-mscc-ocelot-fix-multicast-to-the-cpu-port.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net_sched-avoid-shift-out-of-bounds-in-tcindex_set_parms.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net_sched-gen_estimator-support-large-ewma-log.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net_sched-reject-silly-cell_log-in-qdisc_get_rtab.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/tcp-do-not-mess-with-cloned-skbs-in-tcp_add_backlog.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/tcp-fix-tcp-socket-rehash-stats-mis-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/tcp-fix-tcp_user_timeout-with-zero-window.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/udp-mask-tos-bits-in-udp_v4_early_demux.patch	[new file with mode: 0644]	patch \| blob