From 53a232f7ac307775353562c43bfb0884d5216edc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 May 2020 19:15:16 +0200 Subject: [PATCH] 5.6-stable patches added patches: __netif_receive_skb_core-pass-skb-by-reference.patch ax25-fix-setsockopt-so_bindtodevice.patch dpaa_eth-fix-usage-as-dsa-master-try-3.patch ethtool-count-header-size-in-reply-size-estimate.patch felix-fix-initialization-of-ioremap-resources.patch net-don-t-return-invalid-table-id-error-when-we-fall-back-to-pf_unspec.patch net-dsa-mt7530-fix-roaming-from-dsa-user-ports.patch net-ethernet-ti-cpsw-fix-assert_rtnl-warning-during-suspend.patch net-inet_csk-fix-so_reuseport-bind-address-cache-in-tb-fast.patch net-ipip-fix-wrong-address-family-in-init-error-path.patch net-mlx5-add-command-entry-handling-completion.patch net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch net-mlx5-fix-cleaning-unmanaged-flow-tables.patch net-mlx5-fix-error-flow-in-case-of-function_setup-failure.patch net-mlx5-fix-memory-leak-in-mlx5_events_init.patch net-mlx5e-fix-inner-tirs-handling.patch net-mlx5e-ktls-destroy-key-object-after-destroying-the-tis.patch net-mlx5e-update-netdev-txq-on-completions-during-closure.patch net-mvpp2-fix-rx-hashing-for-non-10g-ports.patch net-nlmsg_cancel-if-put-fails-for-nhmsg.patch net-qrtr-fix-passing-invalid-reference-to-qrtr_local_enqueue.patch net-revert-net-get-rid-of-an-signed-integer-overflow-in-ip_idents_reserve.patch net-sched-fix-reporting-the-first-time-use-timestamp.patch net-tls-fix-race-condition-causing-kernel-panic.patch nexthop-fix-attribute-checking-for-groups.patch r8152-support-additional-microsoft-surface-ethernet-adapter-variant.patch r8169-fix-ocp-access-on-rtl8117.patch sctp-don-t-add-the-shutdown-timer-if-its-already-been-added.patch sctp-start-shutdown-on-association-restart-if-in-shutdown-sent-state-and-socket-is-closed.patch tipc-block-bh-before-using-dst_cache.patch wireguard-noise-read-preshared-key-while-taking-lock.patch wireguard-noise-separate-receive-counter-from-send-counter.patch wireguard-queueing-preserve-flow-hash-across-packet-scrubbing.patch --- ...ceive_skb_core-pass-skb-by-reference.patch | 91 +++++ .../ax25-fix-setsockopt-so_bindtodevice.patch | 72 ++++ ...aa_eth-fix-usage-as-dsa-master-try-3.patch | 73 ++++ ...t-header-size-in-reply-size-estimate.patch | 55 +++ ...-initialization-of-ioremap-resources.patch | 168 +++++++++ ...error-when-we-fall-back-to-pf_unspec.patch | 119 +++++++ ...7530-fix-roaming-from-dsa-user-ports.patch | 121 +++++++ ...x-assert_rtnl-warning-during-suspend.patch | 47 +++ ...seport-bind-address-cache-in-tb-fast.patch | 163 +++++++++ ...ng-address-family-in-init-error-path.patch | 31 ++ ...dd-command-entry-handling-completion.patch | 96 +++++ ...ing-command-interface-to-events-mode.patch | 164 +++++++++ ...5-fix-cleaning-unmanaged-flow-tables.patch | 54 +++ ...ow-in-case-of-function_setup-failure.patch | 41 +++ ...-fix-memory-leak-in-mlx5_events_init.patch | 36 ++ .../net-mlx5e-fix-inner-tirs-handling.patch | 129 +++++++ ...-key-object-after-destroying-the-tis.patch | 36 ++ ...ev-txq-on-completions-during-closure.patch | 54 +++ ...pp2-fix-rx-hashing-for-non-10g-ports.patch | 187 ++++++++++ ...-nlmsg_cancel-if-put-fails-for-nhmsg.patch | 195 ++++++++++ ...alid-reference-to-qrtr_local_enqueue.patch | 38 ++ ...nteger-overflow-in-ip_idents_reserve.patch | 66 ++++ ...porting-the-first-time-use-timestamp.patch | 37 ++ ...-race-condition-causing-kernel-panic.patch | 182 ++++++++++ ...op-fix-attribute-checking-for-groups.patch | 35 ++ ...oft-surface-ethernet-adapter-variant.patch | 60 ++++ .../r8169-fix-ocp-access-on-rtl8117.patch | 63 ++++ ...down-timer-if-its-already-been-added.patch | 81 +++++ ...down-sent-state-and-socket-is-closed.patch | 69 ++++ queue-5.6/series | 33 ++ ...tipc-block-bh-before-using-dst_cache.patch | 84 +++++ ...read-preshared-key-while-taking-lock.patch | 63 ++++ ...te-receive-counter-from-send-counter.patch | 332 ++++++++++++++++++ ...ve-flow-hash-across-packet-scrubbing.patch | 114 ++++++ 34 files changed, 3189 insertions(+) create mode 100644 queue-5.6/__netif_receive_skb_core-pass-skb-by-reference.patch create mode 100644 queue-5.6/ax25-fix-setsockopt-so_bindtodevice.patch create mode 100644 queue-5.6/dpaa_eth-fix-usage-as-dsa-master-try-3.patch create mode 100644 queue-5.6/ethtool-count-header-size-in-reply-size-estimate.patch create mode 100644 queue-5.6/felix-fix-initialization-of-ioremap-resources.patch create mode 100644 queue-5.6/net-don-t-return-invalid-table-id-error-when-we-fall-back-to-pf_unspec.patch create mode 100644 queue-5.6/net-dsa-mt7530-fix-roaming-from-dsa-user-ports.patch create mode 100644 queue-5.6/net-ethernet-ti-cpsw-fix-assert_rtnl-warning-during-suspend.patch create mode 100644 queue-5.6/net-inet_csk-fix-so_reuseport-bind-address-cache-in-tb-fast.patch create mode 100644 queue-5.6/net-ipip-fix-wrong-address-family-in-init-error-path.patch create mode 100644 queue-5.6/net-mlx5-add-command-entry-handling-completion.patch create mode 100644 queue-5.6/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch create mode 100644 queue-5.6/net-mlx5-fix-cleaning-unmanaged-flow-tables.patch create mode 100644 queue-5.6/net-mlx5-fix-error-flow-in-case-of-function_setup-failure.patch create mode 100644 queue-5.6/net-mlx5-fix-memory-leak-in-mlx5_events_init.patch create mode 100644 queue-5.6/net-mlx5e-fix-inner-tirs-handling.patch create mode 100644 queue-5.6/net-mlx5e-ktls-destroy-key-object-after-destroying-the-tis.patch create mode 100644 queue-5.6/net-mlx5e-update-netdev-txq-on-completions-during-closure.patch create mode 100644 queue-5.6/net-mvpp2-fix-rx-hashing-for-non-10g-ports.patch create mode 100644 queue-5.6/net-nlmsg_cancel-if-put-fails-for-nhmsg.patch create mode 100644 queue-5.6/net-qrtr-fix-passing-invalid-reference-to-qrtr_local_enqueue.patch create mode 100644 queue-5.6/net-revert-net-get-rid-of-an-signed-integer-overflow-in-ip_idents_reserve.patch create mode 100644 queue-5.6/net-sched-fix-reporting-the-first-time-use-timestamp.patch create mode 100644 queue-5.6/net-tls-fix-race-condition-causing-kernel-panic.patch create mode 100644 queue-5.6/nexthop-fix-attribute-checking-for-groups.patch create mode 100644 queue-5.6/r8152-support-additional-microsoft-surface-ethernet-adapter-variant.patch create mode 100644 queue-5.6/r8169-fix-ocp-access-on-rtl8117.patch create mode 100644 queue-5.6/sctp-don-t-add-the-shutdown-timer-if-its-already-been-added.patch create mode 100644 queue-5.6/sctp-start-shutdown-on-association-restart-if-in-shutdown-sent-state-and-socket-is-closed.patch create mode 100644 queue-5.6/series create mode 100644 queue-5.6/tipc-block-bh-before-using-dst_cache.patch create mode 100644 queue-5.6/wireguard-noise-read-preshared-key-while-taking-lock.patch create mode 100644 queue-5.6/wireguard-noise-separate-receive-counter-from-send-counter.patch create mode 100644 queue-5.6/wireguard-queueing-preserve-flow-hash-across-packet-scrubbing.patch diff --git a/queue-5.6/__netif_receive_skb_core-pass-skb-by-reference.patch b/queue-5.6/__netif_receive_skb_core-pass-skb-by-reference.patch new file mode 100644 index 00000000000..03f578aac18 --- /dev/null +++ b/queue-5.6/__netif_receive_skb_core-pass-skb-by-reference.patch @@ -0,0 +1,91 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Boris Sukholitko +Date: Tue, 19 May 2020 10:32:37 +0300 +Subject: __netif_receive_skb_core: pass skb by reference + +From: Boris Sukholitko + +[ Upstream commit c0bbbdc32febd4f034ecbf3ea17865785b2c0652 ] + +__netif_receive_skb_core may change the skb pointer passed into it (e.g. +in rx_handler). The original skb may be freed as a result of this +operation. + +The callers of __netif_receive_skb_core may further process original skb +by using pt_prev pointer returned by __netif_receive_skb_core thus +leading to unpleasant effects. + +The solution is to pass skb by reference into __netif_receive_skb_core. + +v2: Added Fixes tag and comment regarding ppt_prev and skb invariant. + +Fixes: 88eb1944e18c ("net: core: propagate SKB lists through packet_type lookup") +Signed-off-by: Boris Sukholitko +Acked-by: Edward Cree +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4988,11 +4988,12 @@ static inline int nf_ingress(struct sk_b + return 0; + } + +-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc, ++static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, + struct packet_type **ppt_prev) + { + struct packet_type *ptype, *pt_prev; + rx_handler_func_t *rx_handler; ++ struct sk_buff *skb = *pskb; + struct net_device *orig_dev; + bool deliver_exact = false; + int ret = NET_RX_DROP; +@@ -5023,8 +5024,10 @@ another_round: + ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); + preempt_enable(); + +- if (ret2 != XDP_PASS) +- return NET_RX_DROP; ++ if (ret2 != XDP_PASS) { ++ ret = NET_RX_DROP; ++ goto out; ++ } + skb_reset_mac_len(skb); + } + +@@ -5174,6 +5177,13 @@ drop: + } + + out: ++ /* The invariant here is that if *ppt_prev is not NULL ++ * then skb should also be non-NULL. ++ * ++ * Apparently *ppt_prev assignment above holds this invariant due to ++ * skb dereferencing near it. ++ */ ++ *pskb = skb; + return ret; + } + +@@ -5183,7 +5193,7 @@ static int __netif_receive_skb_one_core( + struct packet_type *pt_prev = NULL; + int ret; + +- ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); ++ ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); + if (pt_prev) + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); +@@ -5261,7 +5271,7 @@ static void __netif_receive_skb_list_cor + struct packet_type *pt_prev = NULL; + + skb_list_del_init(skb); +- __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); ++ __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); + if (!pt_prev) + continue; + if (pt_curr != pt_prev || od_curr != orig_dev) { diff --git a/queue-5.6/ax25-fix-setsockopt-so_bindtodevice.patch b/queue-5.6/ax25-fix-setsockopt-so_bindtodevice.patch new file mode 100644 index 00000000000..1b3b5b21d2d --- /dev/null +++ b/queue-5.6/ax25-fix-setsockopt-so_bindtodevice.patch @@ -0,0 +1,72 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Eric Dumazet +Date: Tue, 19 May 2020 18:24:43 -0700 +Subject: ax25: fix setsockopt(SO_BINDTODEVICE) + +From: Eric Dumazet + +[ Upstream commit 687775cec056b38a4c8f3291e0dd7a9145f7b667 ] + +syzbot was able to trigger this trace [1], probably by using +a zero optlen. + +While we are at it, cap optlen to IFNAMSIZ - 1 instead of IFNAMSIZ. + +[1] +BUG: KMSAN: uninit-value in strnlen+0xf9/0x170 lib/string.c:569 +CPU: 0 PID: 8807 Comm: syz-executor483 Not tainted 5.7.0-rc4-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x220 lib/dump_stack.c:118 + kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121 + __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 + strnlen+0xf9/0x170 lib/string.c:569 + dev_name_hash net/core/dev.c:207 [inline] + netdev_name_node_lookup net/core/dev.c:277 [inline] + __dev_get_by_name+0x75/0x2b0 net/core/dev.c:778 + ax25_setsockopt+0xfa3/0x1170 net/ax25/af_ax25.c:654 + __compat_sys_setsockopt+0x4ed/0x910 net/compat.c:403 + __do_compat_sys_setsockopt net/compat.c:413 [inline] + __se_compat_sys_setsockopt+0xdd/0x100 net/compat.c:410 + __ia32_compat_sys_setsockopt+0x62/0x80 net/compat.c:410 + do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] + do_fast_syscall_32+0x3bf/0x6d0 arch/x86/entry/common.c:398 + entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 +RIP: 0023:0xf7f57dd9 +Code: 90 e8 0b 00 00 00 f3 90 0f ae e8 eb f9 8d 74 26 00 89 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 +RSP: 002b:00000000ffae8c1c EFLAGS: 00000217 ORIG_RAX: 000000000000016e +RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000000101 +RDX: 0000000000000019 RSI: 0000000020000000 RDI: 0000000000000004 +RBP: 0000000000000012 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 +R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 + +Local variable ----devname@ax25_setsockopt created at: + ax25_setsockopt+0xe6/0x1170 net/ax25/af_ax25.c:536 + ax25_setsockopt+0xe6/0x1170 net/ax25/af_ax25.c:536 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ax25/af_ax25.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -635,8 +635,10 @@ static int ax25_setsockopt(struct socket + break; + + case SO_BINDTODEVICE: +- if (optlen > IFNAMSIZ) +- optlen = IFNAMSIZ; ++ if (optlen > IFNAMSIZ - 1) ++ optlen = IFNAMSIZ - 1; ++ ++ memset(devname, 0, sizeof(devname)); + + if (copy_from_user(devname, optval, optlen)) { + res = -EFAULT; diff --git a/queue-5.6/dpaa_eth-fix-usage-as-dsa-master-try-3.patch b/queue-5.6/dpaa_eth-fix-usage-as-dsa-master-try-3.patch new file mode 100644 index 00000000000..12765cba8bf --- /dev/null +++ b/queue-5.6/dpaa_eth-fix-usage-as-dsa-master-try-3.patch @@ -0,0 +1,73 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Vladimir Oltean +Date: Mon, 25 May 2020 00:22:51 +0300 +Subject: dpaa_eth: fix usage as DSA master, try 3 + +From: Vladimir Oltean + +[ Upstream commit 5d14c304bfc14b4fd052dc83d5224376b48f52f0 ] + +The dpaa-eth driver probes on compatible string for the MAC node, and +the fman/mac.c driver allocates a dpaa-ethernet platform device that +triggers the probing of the dpaa-eth net device driver. + +All of this is fine, but the problem is that the struct device of the +dpaa_eth net_device is 2 parents away from the MAC which can be +referenced via of_node. So of_find_net_device_by_node can't find it, and +DSA switches won't be able to probe on top of FMan ports. + +It would be a bit silly to modify a core function +(of_find_net_device_by_node) to look for dev->parent->parent->of_node +just for one driver. We're just 1 step away from implementing full +recursion. + +Actually there have already been at least 2 previous attempts to make +this work: +- Commit a1a50c8e4c24 ("fsl/man: Inherit parent device and of_node") +- One or more of the patches in "[v3,0/6] adapt DPAA drivers for DSA": + https://patchwork.ozlabs.org/project/netdev/cover/1508178970-28945-1-git-send-email-madalin.bucur@nxp.com/ + (I couldn't really figure out which one was supposed to solve the + problem and how). + +Point being, it looks like this is still pretty much a problem today. +On T1040, the /sys/class/net/eth0 symlink currently points to + +../../devices/platform/ffe000000.soc/ffe400000.fman/ffe4e6000.ethernet/dpaa-ethernet.0/net/eth0 + +which pretty much illustrates the problem. The closest of_node we've got +is the "fsl,fman-memac" at /soc@ffe000000/fman@400000/ethernet@e6000, +which is what we'd like to be able to reference from DSA as host port. + +For of_find_net_device_by_node to find the eth0 port, we would need the +parent of the eth0 net_device to not be the "dpaa-ethernet" platform +device, but to point 1 level higher, aka the "fsl,fman-memac" node +directly. The new sysfs path would look like this: + +../../devices/platform/ffe000000.soc/ffe400000.fman/ffe4e6000.ethernet/net/eth0 + +And this is exactly what SET_NETDEV_DEV does. It sets the parent of the +net_device. The new parent has an of_node associated with it, and +of_dev_node_match already checks for the of_node of the device or of its +parent. + +Fixes: a1a50c8e4c24 ("fsl/man: Inherit parent device and of_node") +Fixes: c6e26ea8c893 ("dpaa_eth: change device used") +Signed-off-by: Vladimir Oltean +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c ++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +@@ -2902,7 +2902,7 @@ static int dpaa_eth_probe(struct platfor + } + + /* Do this here, so we can be verbose early */ +- SET_NETDEV_DEV(net_dev, dev); ++ SET_NETDEV_DEV(net_dev, dev->parent); + dev_set_drvdata(dev, net_dev); + + priv = netdev_priv(net_dev); diff --git a/queue-5.6/ethtool-count-header-size-in-reply-size-estimate.patch b/queue-5.6/ethtool-count-header-size-in-reply-size-estimate.patch new file mode 100644 index 00000000000..24339d83cd1 --- /dev/null +++ b/queue-5.6/ethtool-count-header-size-in-reply-size-estimate.patch @@ -0,0 +1,55 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Michal Kubecek +Date: Sun, 10 May 2020 21:04:09 +0200 +Subject: ethtool: count header size in reply size estimate + +From: Michal Kubecek + +[ Upstream commit 7c87e32d2e380228ada79d20ac5b7674718ef097 ] + +As ethnl_request_ops::reply_size handlers do not include common header +size into calculated/estimated reply size, it needs to be added in +ethnl_default_doit() and ethnl_default_notify() before allocating the +message. On the other hand, strset_reply_size() should not add common +header size. + +Fixes: 728480f12442 ("ethtool: default handlers for GET requests") +Reported-by: Oleksij Rempel +Signed-off-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ethtool/netlink.c | 4 ++-- + net/ethtool/strset.c | 1 - + 2 files changed, 2 insertions(+), 3 deletions(-) + +--- a/net/ethtool/netlink.c ++++ b/net/ethtool/netlink.c +@@ -334,7 +334,7 @@ static int ethnl_default_doit(struct sk_ + ret = ops->reply_size(req_info, reply_data); + if (ret < 0) + goto err_cleanup; +- reply_len = ret; ++ reply_len = ret + ethnl_reply_header_size(); + ret = -ENOMEM; + rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd, + ops->hdr_attr, info, &reply_payload); +@@ -573,7 +573,7 @@ static void ethnl_default_notify(struct + ret = ops->reply_size(req_info, reply_data); + if (ret < 0) + goto err_cleanup; +- reply_len = ret; ++ reply_len = ret + ethnl_reply_header_size(); + ret = -ENOMEM; + skb = genlmsg_new(reply_len, GFP_KERNEL); + if (!skb) +--- a/net/ethtool/strset.c ++++ b/net/ethtool/strset.c +@@ -309,7 +309,6 @@ static int strset_reply_size(const struc + int len = 0; + int ret; + +- len += ethnl_reply_header_size(); + for (i = 0; i < ETH_SS_COUNT; i++) { + const struct strset_info *set_info = &data->sets[i]; + diff --git a/queue-5.6/felix-fix-initialization-of-ioremap-resources.patch b/queue-5.6/felix-fix-initialization-of-ioremap-resources.patch new file mode 100644 index 00000000000..876c4ff2e56 --- /dev/null +++ b/queue-5.6/felix-fix-initialization-of-ioremap-resources.patch @@ -0,0 +1,168 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Claudiu Manoil +Date: Fri, 22 May 2020 11:54:34 +0300 +Subject: felix: Fix initialization of ioremap resources + +From: Claudiu Manoil + +[ Upstream commit b4024c9e5c57902155d3b5e7de482e245f492bff ] + +The caller of devm_ioremap_resource(), either accidentally +or by wrong assumption, is writing back derived resource data +to global static resource initialization tables that should +have been constant. Meaning that after it computes the final +physical start address it saves the address for no reason +in the static tables. This doesn't affect the first driver +probing after reboot, but it breaks consecutive driver reloads +(i.e. driver unbind & bind) because the initialization tables +no longer have the correct initial values. So the next probe() +will map the device registers to wrong physical addresses, +causing ARM SError async exceptions. +This patch fixes all of the above. + +Fixes: 56051948773e ("net: dsa: ocelot: add driver for Felix switch family") +Signed-off-by: Claudiu Manoil +Reviewed-by: Vladimir Oltean +Tested-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/ocelot/felix.c | 23 +++++++++++------------ + drivers/net/dsa/ocelot/felix.h | 6 +++--- + drivers/net/dsa/ocelot/felix_vsc9959.c | 22 ++++++++++------------ + 3 files changed, 24 insertions(+), 27 deletions(-) + +--- a/drivers/net/dsa/ocelot/felix.c ++++ b/drivers/net/dsa/ocelot/felix.c +@@ -385,6 +385,7 @@ static int felix_init_structs(struct fel + struct ocelot *ocelot = &felix->ocelot; + phy_interface_t *port_phy_modes; + resource_size_t switch_base; ++ struct resource res; + int port, i, err; + + ocelot->num_phys_ports = num_phys_ports; +@@ -416,17 +417,16 @@ static int felix_init_structs(struct fel + + for (i = 0; i < TARGET_MAX; i++) { + struct regmap *target; +- struct resource *res; + + if (!felix->info->target_io_res[i].name) + continue; + +- res = &felix->info->target_io_res[i]; +- res->flags = IORESOURCE_MEM; +- res->start += switch_base; +- res->end += switch_base; ++ memcpy(&res, &felix->info->target_io_res[i], sizeof(res)); ++ res.flags = IORESOURCE_MEM; ++ res.start += switch_base; ++ res.end += switch_base; + +- target = ocelot_regmap_init(ocelot, res); ++ target = ocelot_regmap_init(ocelot, &res); + if (IS_ERR(target)) { + dev_err(ocelot->dev, + "Failed to map device memory space\n"); +@@ -447,7 +447,6 @@ static int felix_init_structs(struct fel + for (port = 0; port < num_phys_ports; port++) { + struct ocelot_port *ocelot_port; + void __iomem *port_regs; +- struct resource *res; + + ocelot_port = devm_kzalloc(ocelot->dev, + sizeof(struct ocelot_port), +@@ -459,12 +458,12 @@ static int felix_init_structs(struct fel + return -ENOMEM; + } + +- res = &felix->info->port_io_res[port]; +- res->flags = IORESOURCE_MEM; +- res->start += switch_base; +- res->end += switch_base; ++ memcpy(&res, &felix->info->port_io_res[port], sizeof(res)); ++ res.flags = IORESOURCE_MEM; ++ res.start += switch_base; ++ res.end += switch_base; + +- port_regs = devm_ioremap_resource(ocelot->dev, res); ++ port_regs = devm_ioremap_resource(ocelot->dev, &res); + if (IS_ERR(port_regs)) { + dev_err(ocelot->dev, + "failed to map registers for port %d\n", port); +--- a/drivers/net/dsa/ocelot/felix.h ++++ b/drivers/net/dsa/ocelot/felix.h +@@ -8,9 +8,9 @@ + + /* Platform-specific information */ + struct felix_info { +- struct resource *target_io_res; +- struct resource *port_io_res; +- struct resource *imdio_res; ++ const struct resource *target_io_res; ++ const struct resource *port_io_res; ++ const struct resource *imdio_res; + const struct reg_field *regfields; + const u32 *const *map; + const struct ocelot_ops *ops; +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -328,10 +328,8 @@ static const u32 *vsc9959_regmap[] = { + [GCB] = vsc9959_gcb_regmap, + }; + +-/* Addresses are relative to the PCI device's base address and +- * will be fixed up at ioremap time. +- */ +-static struct resource vsc9959_target_io_res[] = { ++/* Addresses are relative to the PCI device's base address */ ++static const struct resource vsc9959_target_io_res[] = { + [ANA] = { + .start = 0x0280000, + .end = 0x028ffff, +@@ -374,7 +372,7 @@ static struct resource vsc9959_target_io + }, + }; + +-static struct resource vsc9959_port_io_res[] = { ++static const struct resource vsc9959_port_io_res[] = { + { + .start = 0x0100000, + .end = 0x010ffff, +@@ -410,7 +408,7 @@ static struct resource vsc9959_port_io_r + /* Port MAC 0 Internal MDIO bus through which the SerDes acting as an + * SGMII/QSGMII MAC PCS can be found. + */ +-static struct resource vsc9959_imdio_res = { ++static const struct resource vsc9959_imdio_res = { + .start = 0x8030, + .end = 0x8040, + .name = "imdio", +@@ -984,7 +982,7 @@ static int vsc9959_mdio_bus_alloc(struct + struct device *dev = ocelot->dev; + resource_size_t imdio_base; + void __iomem *imdio_regs; +- struct resource *res; ++ struct resource res; + struct enetc_hw *hw; + struct mii_bus *bus; + int port; +@@ -1001,12 +999,12 @@ static int vsc9959_mdio_bus_alloc(struct + imdio_base = pci_resource_start(felix->pdev, + felix->info->imdio_pci_bar); + +- res = felix->info->imdio_res; +- res->flags = IORESOURCE_MEM; +- res->start += imdio_base; +- res->end += imdio_base; ++ memcpy(&res, felix->info->imdio_res, sizeof(res)); ++ res.flags = IORESOURCE_MEM; ++ res.start += imdio_base; ++ res.end += imdio_base; + +- imdio_regs = devm_ioremap_resource(dev, res); ++ imdio_regs = devm_ioremap_resource(dev, &res); + if (IS_ERR(imdio_regs)) { + dev_err(dev, "failed to map internal MDIO registers\n"); + return PTR_ERR(imdio_regs); diff --git a/queue-5.6/net-don-t-return-invalid-table-id-error-when-we-fall-back-to-pf_unspec.patch b/queue-5.6/net-don-t-return-invalid-table-id-error-when-we-fall-back-to-pf_unspec.patch new file mode 100644 index 00000000000..81196e32962 --- /dev/null +++ b/queue-5.6/net-don-t-return-invalid-table-id-error-when-we-fall-back-to-pf_unspec.patch @@ -0,0 +1,119 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Sabrina Dubroca +Date: Wed, 20 May 2020 11:15:46 +0200 +Subject: net: don't return invalid table id error when we fall back to PF_UNSPEC + +From: Sabrina Dubroca + +[ Upstream commit 41b4bd986f86331efc599b9a3f5fb86ad92e9af9 ] + +In case we can't find a ->dumpit callback for the requested +(family,type) pair, we fall back to (PF_UNSPEC,type). In effect, we're +in the same situation as if userspace had requested a PF_UNSPEC +dump. For RTM_GETROUTE, that handler is rtnl_dump_all, which calls all +the registered RTM_GETROUTE handlers. + +The requested table id may or may not exist for all of those +families. commit ae677bbb4441 ("net: Don't return invalid table id +error when dumping all families") fixed the problem when userspace +explicitly requests a PF_UNSPEC dump, but missed the fallback case. + +For example, when we pass ipv6.disable=1 to a kernel with +CONFIG_IP_MROUTE=y and CONFIG_IP_MROUTE_MULTIPLE_TABLES=y, +the (PF_INET6, RTM_GETROUTE) handler isn't registered, so we end up in +rtnl_dump_all, and listing IPv6 routes will unexpectedly print: + + # ip -6 r + Error: ipv4: MR table does not exist. + Dump terminated + +commit ae677bbb4441 introduced the dump_all_families variable, which +gets set when userspace requests a PF_UNSPEC dump. However, we can't +simply set the family to PF_UNSPEC in rtnetlink_rcv_msg in the +fallback case to get dump_all_families == true, because some messages +types (for example RTM_GETRULE and RTM_GETNEIGH) only register the +PF_UNSPEC handler and use the family to filter in the kernel what is +dumped to userspace. We would then export more entries, that userspace +would have to filter. iproute does that, but other programs may not. + +Instead, this patch removes dump_all_families and updates the +RTM_GETROUTE handlers to check if the family that is being dumped is +their own. When it's not, which covers both the intentional PF_UNSPEC +dumps (as dump_all_families did) and the fallback case, ignore the +missing table id error. + +Fixes: cb167893f41e ("net: Plumb support for filtering ipv4 and ipv6 multicast route dumps") +Signed-off-by: Sabrina Dubroca +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip_fib.h | 1 - + net/ipv4/fib_frontend.c | 3 +-- + net/ipv4/ipmr.c | 2 +- + net/ipv6/ip6_fib.c | 2 +- + net/ipv6/ip6mr.c | 2 +- + 5 files changed, 4 insertions(+), 6 deletions(-) + +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -257,7 +257,6 @@ struct fib_dump_filter { + u32 table_id; + /* filter_set is an optimization that an entry is set */ + bool filter_set; +- bool dump_all_families; + bool dump_routes; + bool dump_exceptions; + unsigned char protocol; +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -918,7 +918,6 @@ int ip_valid_fib_dump_req(struct net *ne + else + filter->dump_exceptions = false; + +- filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); + filter->flags = rtm->rtm_flags; + filter->protocol = rtm->rtm_protocol; + filter->rt_type = rtm->rtm_type; +@@ -990,7 +989,7 @@ static int inet_dump_fib(struct sk_buff + if (filter.table_id) { + tb = fib_get_table(net, filter.table_id); + if (!tb) { +- if (filter.dump_all_families) ++ if (rtnl_msg_family(cb->nlh) != PF_INET) + return skb->len; + + NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); +--- a/net/ipv4/ipmr.c ++++ b/net/ipv4/ipmr.c +@@ -2611,7 +2611,7 @@ static int ipmr_rtm_dumproute(struct sk_ + + mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); + if (!mrt) { +- if (filter.dump_all_families) ++ if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) + return skb->len; + + NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -664,7 +664,7 @@ static int inet6_dump_fib(struct sk_buff + if (arg.filter.table_id) { + tb = fib6_get_table(net, arg.filter.table_id); + if (!tb) { +- if (arg.filter.dump_all_families) ++ if (rtnl_msg_family(cb->nlh) != PF_INET6) + goto out; + + NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -2501,7 +2501,7 @@ static int ip6mr_rtm_dumproute(struct sk + + mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); + if (!mrt) { +- if (filter.dump_all_families) ++ if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) + return skb->len; + + NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); diff --git a/queue-5.6/net-dsa-mt7530-fix-roaming-from-dsa-user-ports.patch b/queue-5.6/net-dsa-mt7530-fix-roaming-from-dsa-user-ports.patch new file mode 100644 index 00000000000..68a3f8b7dbd --- /dev/null +++ b/queue-5.6/net-dsa-mt7530-fix-roaming-from-dsa-user-ports.patch @@ -0,0 +1,121 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: DENG Qingfang +Date: Wed, 13 May 2020 23:10:16 +0800 +Subject: net: dsa: mt7530: fix roaming from DSA user ports + +From: DENG Qingfang + +[ Upstream commit 5e5502e012b8129e11be616acb0f9c34bc8f8adb ] + +When a client moves from a DSA user port to a software port in a bridge, +it cannot reach any other clients that connected to the DSA user ports. +That is because SA learning on the CPU port is disabled, so the switch +ignores the client's frames from the CPU port and still thinks it is at +the user port. + +Fix it by enabling SA learning on the CPU port. + +To prevent the switch from learning from flooding frames from the CPU +port, set skb->offload_fwd_mark to 1 for unicast and broadcast frames, +and let the switch flood them instead of trapping to the CPU port. +Multicast frames still need to be trapped to the CPU port for snooping, +so set the SA_DIS bit of the MTK tag to 1 when transmitting those frames +to disable SA learning. + +Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") +Signed-off-by: DENG Qingfang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/mt7530.c | 9 ++------- + drivers/net/dsa/mt7530.h | 1 + + net/dsa/tag_mtk.c | 15 +++++++++++++++ + 3 files changed, 18 insertions(+), 7 deletions(-) + +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -639,11 +639,8 @@ mt7530_cpu_port_enable(struct mt7530_pri + mt7530_write(priv, MT7530_PVC_P(port), + PORT_SPEC_TAG); + +- /* Disable auto learning on the cpu port */ +- mt7530_set(priv, MT7530_PSC_P(port), SA_DIS); +- +- /* Unknown unicast frame fordwarding to the cpu port */ +- mt7530_set(priv, MT7530_MFC, UNU_FFP(BIT(port))); ++ /* Unknown multicast frame forwarding to the cpu port */ ++ mt7530_rmw(priv, MT7530_MFC, UNM_FFP_MASK, UNM_FFP(BIT(port))); + + /* Set CPU port number */ + if (priv->id == ID_MT7621) +@@ -1247,8 +1244,6 @@ mt7530_setup(struct dsa_switch *ds) + /* Enable and reset MIB counters */ + mt7530_mib_reset(ds); + +- mt7530_clear(priv, MT7530_MFC, UNU_FFP_MASK); +- + for (i = 0; i < MT7530_NUM_PORTS; i++) { + /* Disable forwarding by default on all ports */ + mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, +--- a/drivers/net/dsa/mt7530.h ++++ b/drivers/net/dsa/mt7530.h +@@ -31,6 +31,7 @@ enum { + #define MT7530_MFC 0x10 + #define BC_FFP(x) (((x) & 0xff) << 24) + #define UNM_FFP(x) (((x) & 0xff) << 16) ++#define UNM_FFP_MASK UNM_FFP(~0) + #define UNU_FFP(x) (((x) & 0xff) << 8) + #define UNU_FFP_MASK UNU_FFP(~0) + #define CPU_EN BIT(7) +--- a/net/dsa/tag_mtk.c ++++ b/net/dsa/tag_mtk.c +@@ -15,6 +15,7 @@ + #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 + #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) + #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) ++#define MTK_HDR_XMIT_SA_DIS BIT(6) + + static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +@@ -22,6 +23,9 @@ static struct sk_buff *mtk_tag_xmit(stru + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *mtk_tag; + bool is_vlan_skb = true; ++ unsigned char *dest = eth_hdr(skb)->h_dest; ++ bool is_multicast_skb = is_multicast_ether_addr(dest) && ++ !is_broadcast_ether_addr(dest); + + /* Build the special tag after the MAC Source Address. If VLAN header + * is present, it's required that VLAN header and special tag is +@@ -47,6 +51,10 @@ static struct sk_buff *mtk_tag_xmit(stru + MTK_HDR_XMIT_UNTAGGED; + mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + ++ /* Disable SA learning for multicast frames */ ++ if (unlikely(is_multicast_skb)) ++ mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; ++ + /* Tag control information is kept for 802.1Q */ + if (!is_vlan_skb) { + mtk_tag[2] = 0; +@@ -61,6 +69,9 @@ static struct sk_buff *mtk_tag_rcv(struc + { + int port; + __be16 *phdr, hdr; ++ unsigned char *dest = eth_hdr(skb)->h_dest; ++ bool is_multicast_skb = is_multicast_ether_addr(dest) && ++ !is_broadcast_ether_addr(dest); + + if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) + return NULL; +@@ -86,6 +97,10 @@ static struct sk_buff *mtk_tag_rcv(struc + if (!skb->dev) + return NULL; + ++ /* Only unicast or broadcast frames are offloaded */ ++ if (likely(!is_multicast_skb)) ++ skb->offload_fwd_mark = 1; ++ + return skb; + } + diff --git a/queue-5.6/net-ethernet-ti-cpsw-fix-assert_rtnl-warning-during-suspend.patch b/queue-5.6/net-ethernet-ti-cpsw-fix-assert_rtnl-warning-during-suspend.patch new file mode 100644 index 00000000000..debac949525 --- /dev/null +++ b/queue-5.6/net-ethernet-ti-cpsw-fix-assert_rtnl-warning-during-suspend.patch @@ -0,0 +1,47 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Grygorii Strashko +Date: Fri, 22 May 2020 20:09:28 +0300 +Subject: net: ethernet: ti: cpsw: fix ASSERT_RTNL() warning during suspend + +From: Grygorii Strashko + +[ Upstream commit 4c64b83d03f4aafcdf710caad994cbc855802e74 ] + +vlan_for_each() are required to be called with rtnl_lock taken, otherwise +ASSERT_RTNL() warning will be triggered - which happens now during System +resume from suspend: + cpsw_suspend() + |- cpsw_ndo_stop() + |- __hw_addr_ref_unsync_dev() + |- cpsw_purge_all_mc() + |- vlan_for_each() + |- ASSERT_RTNL(); + +Hence, fix it by surrounding cpsw_ndo_stop() by rtnl_lock/unlock() calls. + +Fixes: 15180eca569b ("net: ethernet: ti: cpsw: fix vlan mcast") +Signed-off-by: Grygorii Strashko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/cpsw.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/ethernet/ti/cpsw.c ++++ b/drivers/net/ethernet/ti/cpsw.c +@@ -1752,11 +1752,15 @@ static int cpsw_suspend(struct device *d + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; + ++ rtnl_lock(); ++ + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].ndev) + if (netif_running(cpsw->slaves[i].ndev)) + cpsw_ndo_stop(cpsw->slaves[i].ndev); + ++ rtnl_unlock(); ++ + /* Select sleep pin state */ + pinctrl_pm_select_sleep_state(dev); + diff --git a/queue-5.6/net-inet_csk-fix-so_reuseport-bind-address-cache-in-tb-fast.patch b/queue-5.6/net-inet_csk-fix-so_reuseport-bind-address-cache-in-tb-fast.patch new file mode 100644 index 00000000000..611190b98da --- /dev/null +++ b/queue-5.6/net-inet_csk-fix-so_reuseport-bind-address-cache-in-tb-fast.patch @@ -0,0 +1,163 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Martin KaFai Lau +Date: Mon, 18 May 2020 17:13:34 -0700 +Subject: net: inet_csk: Fix so_reuseport bind-address cache in tb->fast* + +From: Martin KaFai Lau + +[ Upstream commit 88d7fcfa3b1fe670f0412b95be785aafca63352b ] + +The commit 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk") +added a bind-address cache in tb->fast*. The tb->fast* caches the address +of a sk which has successfully been binded with SO_REUSEPORT ON. The idea +is to avoid the expensive conflict search in inet_csk_bind_conflict(). + +There is an issue with wildcard matching where sk_reuseport_match() should +have returned false but it is currently returning true. It ends up +hiding bind conflict. For example, + +bind("[::1]:443"); /* without SO_REUSEPORT. Succeed. */ +bind("[::2]:443"); /* with SO_REUSEPORT. Succeed. */ +bind("[::]:443"); /* with SO_REUSEPORT. Still Succeed where it shouldn't */ + +The last bind("[::]:443") with SO_REUSEPORT on should have failed because +it should have a conflict with the very first bind("[::1]:443") which +has SO_REUSEPORT off. However, the address "[::2]" is cached in +tb->fast* in the second bind. In the last bind, the sk_reuseport_match() +returns true because the binding sk's wildcard addr "[::]" matches with +the "[::2]" cached in tb->fast*. + +The correct bind conflict is reported by removing the second +bind such that tb->fast* cache is not involved and forces the +bind("[::]:443") to go through the inet_csk_bind_conflict(): + +bind("[::1]:443"); /* without SO_REUSEPORT. Succeed. */ +bind("[::]:443"); /* with SO_REUSEPORT. -EADDRINUSE */ + +The expected behavior for sk_reuseport_match() is, it should only allow +the "cached" tb->fast* address to be used as a wildcard match but not +the address of the binding sk. To do that, the current +"bool match_wildcard" arg is split into +"bool match_sk1_wildcard" and "bool match_sk2_wildcard". + +This change only affects the sk_reuseport_match() which is only +used by inet_csk (e.g. TCP). +The other use cases are calling inet_rcv_saddr_equal() and +this patch makes it pass the same "match_wildcard" arg twice to +the "ipv[46]_rcv_saddr_equal(..., match_wildcard, match_wildcard)". + +Cc: Josef Bacik +Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk") +Signed-off-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 43 ++++++++++++++++++++++------------------ + 1 file changed, 24 insertions(+), 19 deletions(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -24,17 +24,19 @@ + #include + + #if IS_ENABLED(CONFIG_IPV6) +-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 +- * only, and any IPv4 addresses if not IPv6 only +- * match_wildcard == false: addresses must be exactly the same, i.e. +- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, +- * and 0.0.0.0 equals to 0.0.0.0 only ++/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses ++ * if IPv6 only, and any IPv4 addresses ++ * if not IPv6 only ++ * match_sk*_wildcard == false: addresses must be exactly the same, i.e. ++ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, ++ * and 0.0.0.0 equals to 0.0.0.0 only + */ + static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, + const struct in6_addr *sk2_rcv_saddr6, + __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk1_ipv6only, bool sk2_ipv6only, +- bool match_wildcard) ++ bool match_sk1_wildcard, ++ bool match_sk2_wildcard) + { + int addr_type = ipv6_addr_type(sk1_rcv_saddr6); + int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; +@@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const s + if (!sk2_ipv6only) { + if (sk1_rcv_saddr == sk2_rcv_saddr) + return true; +- if (!sk1_rcv_saddr || !sk2_rcv_saddr) +- return match_wildcard; ++ return (match_sk1_wildcard && !sk1_rcv_saddr) || ++ (match_sk2_wildcard && !sk2_rcv_saddr); + } + return false; + } +@@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const s + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return true; + +- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && ++ if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && + !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) + return true; + +- if (addr_type == IPV6_ADDR_ANY && match_wildcard && ++ if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && + !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) + return true; + +@@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const s + } + #endif + +-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses +- * match_wildcard == false: addresses must be exactly the same, i.e. +- * 0.0.0.0 only equals to 0.0.0.0 ++/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses ++ * match_sk*_wildcard == false: addresses must be exactly the same, i.e. ++ * 0.0.0.0 only equals to 0.0.0.0 + */ + static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, +- bool sk2_ipv6only, bool match_wildcard) ++ bool sk2_ipv6only, bool match_sk1_wildcard, ++ bool match_sk2_wildcard) + { + if (!sk2_ipv6only) { + if (sk1_rcv_saddr == sk2_rcv_saddr) + return true; +- if (!sk1_rcv_saddr || !sk2_rcv_saddr) +- return match_wildcard; ++ return (match_sk1_wildcard && !sk1_rcv_saddr) || ++ (match_sk2_wildcard && !sk2_rcv_saddr); + } + return false; + } +@@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct s + sk2->sk_rcv_saddr, + ipv6_only_sock(sk), + ipv6_only_sock(sk2), ++ match_wildcard, + match_wildcard); + #endif + return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, +- ipv6_only_sock(sk2), match_wildcard); ++ ipv6_only_sock(sk2), match_wildcard, ++ match_wildcard); + } + EXPORT_SYMBOL(inet_rcv_saddr_equal); + +@@ -273,10 +278,10 @@ static inline int sk_reuseport_match(str + tb->fast_rcv_saddr, + sk->sk_rcv_saddr, + tb->fast_ipv6_only, +- ipv6_only_sock(sk), true); ++ ipv6_only_sock(sk), true, false); + #endif + return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, +- ipv6_only_sock(sk), true); ++ ipv6_only_sock(sk), true, false); + } + + /* Obtain a reference to a local port for the given sock, diff --git a/queue-5.6/net-ipip-fix-wrong-address-family-in-init-error-path.patch b/queue-5.6/net-ipip-fix-wrong-address-family-in-init-error-path.patch new file mode 100644 index 00000000000..50c175cc810 --- /dev/null +++ b/queue-5.6/net-ipip-fix-wrong-address-family-in-init-error-path.patch @@ -0,0 +1,31 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Vadim Fedorenko +Date: Wed, 20 May 2020 11:50:48 +0300 +Subject: net: ipip: fix wrong address family in init error path + +From: Vadim Fedorenko + +[ Upstream commit 57ebc8f08504f176eb0f25b3e0fde517dec61a4f ] + +In case of error with MPLS support the code is misusing AF_INET +instead of AF_MPLS. + +Fixes: 1b69e7e6c4da ("ipip: support MPLS over IPv4") +Signed-off-by: Vadim Fedorenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/ipip.c ++++ b/net/ipv4/ipip.c +@@ -698,7 +698,7 @@ out: + + rtnl_link_failed: + #if IS_ENABLED(CONFIG_MPLS) +- xfrm4_tunnel_deregister(&mplsip_handler, AF_INET); ++ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); + xfrm_tunnel_mplsip_failed: + + #endif diff --git a/queue-5.6/net-mlx5-add-command-entry-handling-completion.patch b/queue-5.6/net-mlx5-add-command-entry-handling-completion.patch new file mode 100644 index 00000000000..1d3520e5623 --- /dev/null +++ b/queue-5.6/net-mlx5-add-command-entry-handling-completion.patch @@ -0,0 +1,96 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Moshe Shemesh +Date: Fri, 27 Dec 2019 07:01:53 +0200 +Subject: net/mlx5: Add command entry handling completion + +From: Moshe Shemesh + +[ Upstream commit 17d00e839d3b592da9659c1977d45f85b77f986a ] + +When FW response to commands is very slow and all command entries in +use are waiting for completion we can have a race where commands can get +timeout before they get out of the queue and handled. Timeout +completion on uninitialized command will cause releasing command's +buffers before accessing it for initialization and then we will get NULL +pointer exception while trying access it. It may also cause releasing +buffers of another command since we may have timeout completion before +even allocating entry index for this command. +Add entry handling completion to avoid this race. + +Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") +Signed-off-by: Moshe Shemesh +Signed-off-by: Eran Ben Elisha +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 14 ++++++++++++++ + include/linux/mlx5/driver.h | 1 + + 2 files changed, 15 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -861,6 +861,7 @@ static void cmd_work_handler(struct work + int alloc_ret; + int cmd_mode; + ++ complete(&ent->handling); + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); + if (!ent->page_queue) { +@@ -978,6 +979,11 @@ static int wait_func(struct mlx5_core_de + struct mlx5_cmd *cmd = &dev->cmd; + int err; + ++ if (!wait_for_completion_timeout(&ent->handling, timeout) && ++ cancel_work_sync(&ent->work)) { ++ ent->ret = -ECANCELED; ++ goto out_err; ++ } + if (cmd->mode == CMD_MODE_POLLING || ent->polling) { + wait_for_completion(&ent->done); + } else if (!wait_for_completion_timeout(&ent->done, timeout)) { +@@ -985,12 +991,17 @@ static int wait_func(struct mlx5_core_de + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + } + ++out_err: + err = ent->ret; + + if (err == -ETIMEDOUT) { + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); ++ } else if (err == -ECANCELED) { ++ mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", ++ mlx5_command_str(msg_to_opcode(ent->in)), ++ msg_to_opcode(ent->in)); + } + mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", + err, deliv_status_to_str(ent->status), ent->status); +@@ -1026,6 +1037,7 @@ static int mlx5_cmd_invoke(struct mlx5_c + ent->token = token; + ent->polling = force_polling; + ++ init_completion(&ent->handling); + if (!callback) + init_completion(&ent->done); + +@@ -1045,6 +1057,8 @@ static int mlx5_cmd_invoke(struct mlx5_c + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out; ++ if (err == -ECANCELED) ++ goto out_free; + + ds = ent->ts2 - ent->ts1; + op = MLX5_GET(mbox_in, in->first.data, opcode); +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -761,6 +761,7 @@ struct mlx5_cmd_work_ent { + struct delayed_work cb_timeout_work; + void *context; + int idx; ++ struct completion handling; + struct completion done; + struct mlx5_cmd *cmd; + struct work_struct work; diff --git a/queue-5.6/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch b/queue-5.6/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch new file mode 100644 index 00000000000..f26ad7a2d4e --- /dev/null +++ b/queue-5.6/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch @@ -0,0 +1,164 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Eran Ben Elisha +Date: Wed, 18 Mar 2020 21:44:32 +0200 +Subject: net/mlx5: Fix a race when moving command interface to events mode + +From: Eran Ben Elisha + +[ Upstream commit d43b7007dbd1195a5b6b83213e49b1516aaf6f5e ] + +After driver creates (via FW command) an EQ for commands, the driver will +be informed on new commands completion by EQE. However, due to a race in +driver's internal command mode metadata update, some new commands will +still be miss-handled by driver as if we are in polling mode. Such commands +can get two non forced completion, leading to already freed command entry +access. + +CREATE_EQ command, that maps EQ to the command queue must be posted to the +command queue while it is empty and no other command should be posted. + +Add SW mechanism that once the CREATE_EQ command is about to be executed, +all other commands will return error without being sent to the FW. Allow +sending other commands only after successfully changing the driver's +internal command mode metadata. +We can safely return error to all other commands while creating the command +EQ, as all other commands might be sent from the user/application during +driver load. Application can rerun them later after driver's load was +finished. + +Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") +Signed-off-by: Eran Ben Elisha +Signed-off-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 35 +++++++++++++++++++++++--- + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 ++ + include/linux/mlx5/driver.h | 6 ++++ + 3 files changed, 40 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -848,6 +848,14 @@ static void free_msg(struct mlx5_core_de + static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + ++static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) ++{ ++ if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL) ++ return true; ++ ++ return cmd->allowed_opcode == opcode; ++} ++ + static void cmd_work_handler(struct work_struct *work) + { + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); +@@ -914,7 +922,8 @@ static void cmd_work_handler(struct work + + /* Skip sending command to fw if internal error */ + if (pci_channel_offline(dev->pdev) || +- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { ++ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || ++ !opcode_allowed(&dev->cmd, ent->op)) { + u8 status = 0; + u32 drv_synd; + +@@ -1405,6 +1414,22 @@ static void create_debugfs_files(struct + mlx5_cmdif_debugfs_init(dev); + } + ++void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) ++{ ++ struct mlx5_cmd *cmd = &dev->cmd; ++ int i; ++ ++ for (i = 0; i < cmd->max_reg_cmds; i++) ++ down(&cmd->sem); ++ down(&cmd->pages_sem); ++ ++ cmd->allowed_opcode = opcode; ++ ++ up(&cmd->pages_sem); ++ for (i = 0; i < cmd->max_reg_cmds; i++) ++ up(&cmd->sem); ++} ++ + static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) + { + struct mlx5_cmd *cmd = &dev->cmd; +@@ -1681,12 +1706,13 @@ static int cmd_exec(struct mlx5_core_dev + int err; + u8 status = 0; + u32 drv_synd; ++ u16 opcode; + u8 token; + ++ opcode = MLX5_GET(mbox_in, in, opcode); + if (pci_channel_offline(dev->pdev) || +- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { +- u16 opcode = MLX5_GET(mbox_in, in, opcode); +- ++ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || ++ !opcode_allowed(&dev->cmd, opcode)) { + err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, drv_synd); +@@ -1988,6 +2014,7 @@ int mlx5_cmd_init(struct mlx5_core_dev * + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; ++ cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + + create_msg_cache(dev); + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +@@ -611,11 +611,13 @@ static int create_async_eqs(struct mlx5_ + .nent = MLX5_NUM_CMD_EQE, + .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, + }; ++ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); + err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); + if (err) + goto err1; + + mlx5_cmd_use_events(dev); ++ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + + param = (struct mlx5_eq_param) { + .irq_index = 0, +@@ -645,6 +647,7 @@ err2: + mlx5_cmd_use_polling(dev); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); + err1: ++ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + return err; + } +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -301,6 +301,7 @@ struct mlx5_cmd { + struct semaphore sem; + struct semaphore pages_sem; + int mode; ++ u16 allowed_opcode; + struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; + struct dma_pool *pool; + struct mlx5_cmd_debug dbg; +@@ -893,10 +894,15 @@ mlx5_frag_buf_get_idx_last_contig_stride + return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); + } + ++enum { ++ CMD_ALLOWED_OPCODE_ALL, ++}; ++ + int mlx5_cmd_init(struct mlx5_core_dev *dev); + void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); + void mlx5_cmd_use_events(struct mlx5_core_dev *dev); + void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); ++void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); + + struct mlx5_async_ctx { + struct mlx5_core_dev *dev; diff --git a/queue-5.6/net-mlx5-fix-cleaning-unmanaged-flow-tables.patch b/queue-5.6/net-mlx5-fix-cleaning-unmanaged-flow-tables.patch new file mode 100644 index 00000000000..3fa297275e5 --- /dev/null +++ b/queue-5.6/net-mlx5-fix-cleaning-unmanaged-flow-tables.patch @@ -0,0 +1,54 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Roi Dayan +Date: Mon, 11 May 2020 16:32:09 +0300 +Subject: net/mlx5: Fix cleaning unmanaged flow tables + +From: Roi Dayan + +[ Upstream commit aee37f3d940ca732df71c3df49347bccaafc0b24 ] + +Unmanaged flow tables doesn't have a parent and tree_put_node() +assume there is always a parent if cleaning is needed. fix that. + +Fixes: 5281a0c90919 ("net/mlx5: fs_core: Introduce unmanaged flow tables") +Signed-off-by: Roi Dayan +Reviewed-by: Mark Bloch +Reviewed-by: Paul Blakey +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -323,14 +323,13 @@ static void tree_put_node(struct fs_node + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { +- /* Only root namespace doesn't have parent and we just +- * need to free its node. +- */ + down_write_ref_node(parent_node, locked); + list_del_init(&node->list); + if (node->del_sw_func) + node->del_sw_func(node); + up_write_ref_node(parent_node, locked); ++ } else if (node->del_sw_func) { ++ node->del_sw_func(node); + } else { + kfree(node); + } +@@ -447,8 +446,10 @@ static void del_sw_flow_table(struct fs_ + fs_get_obj(ft, node); + + rhltable_destroy(&ft->fgs_hash); +- fs_get_obj(prio, ft->node.parent); +- prio->num_ft--; ++ if (ft->node.parent) { ++ fs_get_obj(prio, ft->node.parent); ++ prio->num_ft--; ++ } + kfree(ft); + } + diff --git a/queue-5.6/net-mlx5-fix-error-flow-in-case-of-function_setup-failure.patch b/queue-5.6/net-mlx5-fix-error-flow-in-case-of-function_setup-failure.patch new file mode 100644 index 00000000000..b72c35ea27e --- /dev/null +++ b/queue-5.6/net-mlx5-fix-error-flow-in-case-of-function_setup-failure.patch @@ -0,0 +1,41 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Shay Drory +Date: Wed, 6 May 2020 14:52:04 +0300 +Subject: net/mlx5: Fix error flow in case of function_setup failure + +From: Shay Drory + +[ Upstream commit 4f7400d5cbaef676e00cdffb0565bf731c6bb09e ] + +Currently, if an error occurred during mlx5_function_setup(), we +keep dev->state as DEVICE_STATE_UP. +Fixing it by adding a goto label. + +Fixes: e161105e58da ("net/mlx5: Function setup/teardown procedures") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1179,7 +1179,7 @@ int mlx5_load_one(struct mlx5_core_dev * + + err = mlx5_function_setup(dev, boot); + if (err) +- goto out; ++ goto err_function; + + if (boot) { + err = mlx5_init_once(dev); +@@ -1225,6 +1225,7 @@ err_load: + mlx5_cleanup_once(dev); + function_teardown: + mlx5_function_teardown(dev, boot); ++err_function: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mutex_unlock(&dev->intf_state_mutex); + diff --git a/queue-5.6/net-mlx5-fix-memory-leak-in-mlx5_events_init.patch b/queue-5.6/net-mlx5-fix-memory-leak-in-mlx5_events_init.patch new file mode 100644 index 00000000000..3c224efaa8b --- /dev/null +++ b/queue-5.6/net-mlx5-fix-memory-leak-in-mlx5_events_init.patch @@ -0,0 +1,36 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Moshe Shemesh +Date: Wed, 29 Apr 2020 23:56:58 +0300 +Subject: net/mlx5: Fix memory leak in mlx5_events_init + +From: Moshe Shemesh + +[ Upstream commit df14ad1eccb04a4a28c90389214dbacab085b244 ] + +Fix memory leak in mlx5_events_init(), in case +create_single_thread_workqueue() fails, events +struct should be freed. + +Fixes: 5d3c537f9070 ("net/mlx5: Handle event of power detection in the PCIE slot") +Signed-off-by: Moshe Shemesh +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/events.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c +@@ -346,8 +346,10 @@ int mlx5_events_init(struct mlx5_core_de + events->dev = dev; + dev->priv.events = events; + events->wq = create_singlethread_workqueue("mlx5_events"); +- if (!events->wq) ++ if (!events->wq) { ++ kfree(events); + return -ENOMEM; ++ } + INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); + + return 0; diff --git a/queue-5.6/net-mlx5e-fix-inner-tirs-handling.patch b/queue-5.6/net-mlx5e-fix-inner-tirs-handling.patch new file mode 100644 index 00000000000..33de16871bf --- /dev/null +++ b/queue-5.6/net-mlx5e-fix-inner-tirs-handling.patch @@ -0,0 +1,129 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Roi Dayan +Date: Thu, 30 Apr 2020 09:16:01 +0300 +Subject: net/mlx5e: Fix inner tirs handling + +From: Roi Dayan + +[ Upstream commit a16b8e0dcf7043bee46174bed0553cc9e36b63a5 ] + +In the cited commit inner_tirs argument was added to create and destroy +inner tirs, and no indication was added to mlx5e_modify_tirs_hash() +function. In order to have a consistent handling, use +inner_indir_tir[0].tirn in tirs destroy/modify function as an indication +to whether inner tirs are created. +Inner tirs are not created for representors and before this commit, +a call to mlx5e_modify_tirs_hash() was sending HW commands to +modify non-existent inner tirs. + +Fixes: 46dc933cee82 ("net/mlx5e: Provide explicit directive if to create inner indirect tirs") +Signed-off-by: Roi Dayan +Reviewed-by: Vlad Buslov +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 +++++++----- + drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++-- + drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++-- + 4 files changed, 12 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -1104,7 +1104,7 @@ void mlx5e_close_drop_rq(struct mlx5e_rq + int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); + + int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); +-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); ++void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); + + int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); + void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -2747,7 +2747,8 @@ void mlx5e_modify_tirs_hash(struct mlx5e + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } + +- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) ++ /* Verify inner tirs resources allocated */ ++ if (!priv->inner_indir_tir[0].tirn) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { +@@ -3394,14 +3395,15 @@ out: + return err; + } + +-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) ++void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) + { + int i; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + +- if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) ++ /* Verify inner tirs resources allocated */ ++ if (!priv->inner_indir_tir[0].tirn) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) +@@ -5107,7 +5109,7 @@ err_destroy_xsk_rqts: + err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + err_destroy_indirect_tirs: +- mlx5e_destroy_indirect_tirs(priv, true); ++ mlx5e_destroy_indirect_tirs(priv); + err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + err_destroy_indirect_rqts: +@@ -5126,7 +5128,7 @@ static void mlx5e_cleanup_nic_rx(struct + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); +- mlx5e_destroy_indirect_tirs(priv, true); ++ mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_close_drop_rq(&priv->drop_rq); +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +@@ -1667,7 +1667,7 @@ err_destroy_ttc_table: + err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + err_destroy_indirect_tirs: +- mlx5e_destroy_indirect_tirs(priv, false); ++ mlx5e_destroy_indirect_tirs(priv); + err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + err_destroy_indirect_rqts: +@@ -1684,7 +1684,7 @@ static void mlx5e_cleanup_rep_rx(struct + mlx5_del_flow_rules(rpriv->vport_rx_rule); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); +- mlx5e_destroy_indirect_tirs(priv, false); ++ mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_close_drop_rq(&priv->drop_rq); +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +@@ -396,7 +396,7 @@ static int mlx5i_init_rx(struct mlx5e_pr + err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + err_destroy_indirect_tirs: +- mlx5e_destroy_indirect_tirs(priv, true); ++ mlx5e_destroy_indirect_tirs(priv); + err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + err_destroy_indirect_rqts: +@@ -412,7 +412,7 @@ static void mlx5i_cleanup_rx(struct mlx5 + { + mlx5i_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); +- mlx5e_destroy_indirect_tirs(priv, true); ++ mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_close_drop_rq(&priv->drop_rq); diff --git a/queue-5.6/net-mlx5e-ktls-destroy-key-object-after-destroying-the-tis.patch b/queue-5.6/net-mlx5e-ktls-destroy-key-object-after-destroying-the-tis.patch new file mode 100644 index 00000000000..f52c5454bf0 --- /dev/null +++ b/queue-5.6/net-mlx5e-ktls-destroy-key-object-after-destroying-the-tis.patch @@ -0,0 +1,36 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Tariq Toukan +Date: Mon, 27 Apr 2020 16:56:59 +0300 +Subject: net/mlx5e: kTLS, Destroy key object after destroying the TIS + +From: Tariq Toukan + +[ Upstream commit 16736e11f43b80a38f98f6add54fab3b8c297df3 ] + +The TLS TIS object contains the dek/key ID. +By destroying the key first, the TIS would contain an invalid +non-existing key ID. +Reverse the destroy order, this also acheives the desired assymetry +between the destroy and the create flows. + +Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") +Signed-off-by: Tariq Toukan +Reviewed-by: Boris Pismenny +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +@@ -69,8 +69,8 @@ static void mlx5e_ktls_del(struct net_de + struct mlx5e_ktls_offload_context_tx *tx_priv = + mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + +- mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); ++ mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); + kvfree(tx_priv); + } + diff --git a/queue-5.6/net-mlx5e-update-netdev-txq-on-completions-during-closure.patch b/queue-5.6/net-mlx5e-update-netdev-txq-on-completions-during-closure.patch new file mode 100644 index 00000000000..f80fcbe809c --- /dev/null +++ b/queue-5.6/net-mlx5e-update-netdev-txq-on-completions-during-closure.patch @@ -0,0 +1,54 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Moshe Shemesh +Date: Tue, 7 Apr 2020 17:38:28 +0300 +Subject: net/mlx5e: Update netdev txq on completions during closure + +From: Moshe Shemesh + +[ Upstream commit 5e911e2c06bd8c17df29147a5e2d4b17fafda024 ] + +On sq closure when we free its descriptors, we should also update netdev +txq on completions which would not arrive. Otherwise if we reopen sqs +and attach them back, for example on fw fatal recovery flow, we may get +tx timeout. + +Fixes: 29429f3300a3 ("net/mlx5e: Timeout if SQ doesn't flush during close") +Signed-off-by: Moshe Shemesh +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -538,10 +538,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *c + void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) + { + struct mlx5e_tx_wqe_info *wi; ++ u32 dma_fifo_cc, nbytes = 0; ++ u16 ci, sqcc, npkts = 0; + struct sk_buff *skb; +- u32 dma_fifo_cc; +- u16 sqcc; +- u16 ci; + int i; + + sqcc = sq->cc; +@@ -566,11 +565,15 @@ void mlx5e_free_txqsq_descs(struct mlx5e + } + + dev_kfree_skb_any(skb); ++ npkts++; ++ nbytes += wi->num_bytes; + sqcc += wi->num_wqebbs; + } + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; ++ ++ netdev_tx_completed_queue(sq->txq, npkts, nbytes); + } + + #ifdef CONFIG_MLX5_CORE_IPOIB diff --git a/queue-5.6/net-mvpp2-fix-rx-hashing-for-non-10g-ports.patch b/queue-5.6/net-mvpp2-fix-rx-hashing-for-non-10g-ports.patch new file mode 100644 index 00000000000..eb691662f2f --- /dev/null +++ b/queue-5.6/net-mvpp2-fix-rx-hashing-for-non-10g-ports.patch @@ -0,0 +1,187 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Russell King +Date: Wed, 20 May 2020 12:26:35 +0100 +Subject: net: mvpp2: fix RX hashing for non-10G ports + +From: Russell King + +[ Upstream commit 3138a07ce219acde4c0d7ea0b6d54ba64153328b ] + +When rxhash is enabled on any ethernet port except the first in each CP +block, traffic flow is prevented. The analysis is below: + +I've been investigating this afternoon, and what I've found, comparing +a kernel without 895586d5dc32 and with 895586d5dc32 applied is: + +- The table programmed into the hardware via mvpp22_rss_fill_table() + appears to be identical with or without the commit. + +- When rxhash is enabled on eth2, mvpp2_rss_port_c2_enable() reports + that c2.attr[0] and c2.attr[2] are written back containing: + + - with 895586d5dc32, failing: 00200000 40000000 + - without 895586d5dc32, working: 04000000 40000000 + +- When disabling rxhash, c2.attr[0] and c2.attr[2] are written back as: + + 04000000 00000000 + +The second value represents the MVPP22_CLS_C2_ATTR2_RSS_EN bit, the +first value is the queue number, which comprises two fields. The high +5 bits are 24:29 and the low three are 21:23 inclusive. This comes +from: + + c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) | + MVPP22_CLS_C2_ATTR0_QLOW(ql); + +So, the working case gives eth2 a queue id of 4.0, or 32 as per +port->first_rxq, and the non-working case a queue id of 0.1, or 1. +The allocation of queue IDs seems to be in mvpp2_port_probe(): + + if (priv->hw_version == MVPP21) + port->first_rxq = port->id * port->nrxqs; + else + port->first_rxq = port->id * priv->max_port_rxqs; + +Where: + + if (priv->hw_version == MVPP21) + priv->max_port_rxqs = 8; + else + priv->max_port_rxqs = 32; + +Making the port 0 (eth0 / eth1) have port->first_rxq = 0, and port 1 +(eth2) be 32. It seems the idea is that the first 32 queues belong to +port 0, the second 32 queues belong to port 1, etc. + +mvpp2_rss_port_c2_enable() gets the queue number from it's parameter, +'ctx', which comes from mvpp22_rss_ctx(port, 0). This returns +port->rss_ctx[0]. + +mvpp22_rss_context_create() is responsible for allocating that, which +it does by looking for an unallocated priv->rss_tables[] pointer. This +table is shared amongst all ports on the CP silicon. + +When we write the tables in mvpp22_rss_fill_table(), the RSS table +entry is defined by: + + u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) | + MVPP22_RSS_INDEX_TABLE_ENTRY(i); + +where rss_ctx is the context ID (queue number) and i is the index in +the table. + +If we look at what is written: + +- The first table to be written has "sel" values of 00000000..0000001f, + containing values 0..3. This appears to be for eth1. This is table 0, + RX queue number 0. +- The second table has "sel" values of 00000100..0000011f, and appears + to be for eth2. These contain values 0x20..0x23. This is table 1, + RX queue number 0. +- The third table has "sel" values of 00000200..0000021f, and appears + to be for eth3. These contain values 0x40..0x43. This is table 2, + RX queue number 0. + +How do queue numbers translate to the RSS table? There is another +table - the RXQ2RSS table, indexed by the MVPP22_RSS_INDEX_QUEUE field +of MVPP22_RSS_INDEX and accessed through the MVPP22_RXQ2RSS_TABLE +register. Before 895586d5dc32, it was: + + mvpp2_write(priv, MVPP22_RSS_INDEX, + MVPP22_RSS_INDEX_QUEUE(port->first_rxq)); + mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, + MVPP22_RSS_TABLE_POINTER(port->id)); + +and after: + + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx)); + mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx)); + +Before the commit, for eth2, that would've contained '32' for the +index and '1' for the table pointer - mapping queue 32 to table 1. +Remember that this is queue-high.queue-low of 4.0. + +After the commit, we appear to map queue 1 to table 1. That again +looks fine on the face of it. + +Section 9.3.1 of the A8040 manual seems indicate the reason that the +queue number is separated. queue-low seems to always come from the +classifier, whereas queue-high can be from the ingress physical port +number or the classifier depending on the MVPP2_CLS_SWFWD_PCTRL_REG. + +We set the port bit in MVPP2_CLS_SWFWD_PCTRL_REG, meaning that queue-high +comes from the MVPP2_CLS_SWFWD_P2HQ_REG() register... and this seems to +be where our bug comes from. + +mvpp2_cls_oversize_rxq_set() sets this up as: + + mvpp2_write(port->priv, MVPP2_CLS_SWFWD_P2HQ_REG(port->id), + (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS)); + + val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG); + val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id); + mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val); + +Setting the MVPP2_CLS_SWFWD_PCTRL_MASK bit means that the queue-high +for eth2 is _always_ 4, so only queues 32 through 39 inclusive are +available to eth2. Yet, we're trying to tell the classifier to set +queue-high, which will be ignored, to zero. Hence, the queue-high +field (MVPP22_CLS_C2_ATTR0_QHIGH()) from the classifier will be +ignored. + +This means we end up directing traffic from eth2 not to queue 1, but +to queue 33, and then we tell it to look up queue 33 in the RSS table. +However, RSS table has not been programmed for queue 33, and so it ends +up (presumably) dropping the packets. + +It seems that mvpp22_rss_context_create() doesn't take account of the +fact that the upper 5 bits of the queue ID can't actually be changed +due to the settings in mvpp2_cls_oversize_rxq_set(), _or_ it seems that +mvpp2_cls_oversize_rxq_set() has been missed in this commit. Either +way, these two functions mutually disagree with what queue number +should be used. + +Looking deeper into what mvpp2_cls_oversize_rxq_set() and the MTU +validation is doing, it seems that MVPP2_CLS_SWFWD_P2HQ_REG() is used +for over-sized packets attempting to egress through this port. With +the classifier having had RSS enabled and directing eth2 traffic to +queue 1, we may still have packets appearing on queue 32 for this port. + +However, the only way we may end up with over-sized packets attempting +to egress through eth2 - is if the A8040 forwards frames between its +ports. From what I can see, we don't support that feature, and the +kernel restricts the egress packet size to the MTU. In any case, if we +were to attempt to transmit an oversized packet, we have no support in +the kernel to deal with that appearing in the port's receive queue. + +So, this patch attempts to solve the issue by clearing the +MVPP2_CLS_SWFWD_PCTRL_MASK() bit, allowing MVPP22_CLS_C2_ATTR0_QHIGH() +from the classifier to define the queue-high field of the queue number. + +My testing seems to confirm my findings above - clearing this bit +means that if I enable rxhash on eth2, the interface can then pass +traffic, as we are now directing traffic to RX queue 1 rather than +queue 33. Traffic still seems to work with rxhash off as well. + +Reported-by: Matteo Croce +Tested-by: Matteo Croce +Fixes: 895586d5dc32 ("net: mvpp2: cls: Use RSS contexts to handle RSS tables") +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +@@ -1070,7 +1070,7 @@ void mvpp2_cls_oversize_rxq_set(struct m + (port->first_rxq >> MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS)); + + val = mvpp2_read(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG); +- val |= MVPP2_CLS_SWFWD_PCTRL_MASK(port->id); ++ val &= ~MVPP2_CLS_SWFWD_PCTRL_MASK(port->id); + mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val); + } + diff --git a/queue-5.6/net-nlmsg_cancel-if-put-fails-for-nhmsg.patch b/queue-5.6/net-nlmsg_cancel-if-put-fails-for-nhmsg.patch new file mode 100644 index 00000000000..eb8613efb48 --- /dev/null +++ b/queue-5.6/net-nlmsg_cancel-if-put-fails-for-nhmsg.patch @@ -0,0 +1,195 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Stephen Worley +Date: Tue, 19 May 2020 21:57:12 -0400 +Subject: net: nlmsg_cancel() if put fails for nhmsg + +From: Stephen Worley + +[ Upstream commit d69100b8eee27c2d60ee52df76e0b80a8d492d34 ] + +Fixes data remnant seen when we fail to reserve space for a +nexthop group during a larger dump. + +If we fail the reservation, we goto nla_put_failure and +cancel the message. + +Reproduce with the following iproute2 commands: +===================== +ip link add dummy1 type dummy +ip link add dummy2 type dummy +ip link add dummy3 type dummy +ip link add dummy4 type dummy +ip link add dummy5 type dummy +ip link add dummy6 type dummy +ip link add dummy7 type dummy +ip link add dummy8 type dummy +ip link add dummy9 type dummy +ip link add dummy10 type dummy +ip link add dummy11 type dummy +ip link add dummy12 type dummy +ip link add dummy13 type dummy +ip link add dummy14 type dummy +ip link add dummy15 type dummy +ip link add dummy16 type dummy +ip link add dummy17 type dummy +ip link add dummy18 type dummy +ip link add dummy19 type dummy +ip link add dummy20 type dummy +ip link add dummy21 type dummy +ip link add dummy22 type dummy +ip link add dummy23 type dummy +ip link add dummy24 type dummy +ip link add dummy25 type dummy +ip link add dummy26 type dummy +ip link add dummy27 type dummy +ip link add dummy28 type dummy +ip link add dummy29 type dummy +ip link add dummy30 type dummy +ip link add dummy31 type dummy +ip link add dummy32 type dummy + +ip link set dummy1 up +ip link set dummy2 up +ip link set dummy3 up +ip link set dummy4 up +ip link set dummy5 up +ip link set dummy6 up +ip link set dummy7 up +ip link set dummy8 up +ip link set dummy9 up +ip link set dummy10 up +ip link set dummy11 up +ip link set dummy12 up +ip link set dummy13 up +ip link set dummy14 up +ip link set dummy15 up +ip link set dummy16 up +ip link set dummy17 up +ip link set dummy18 up +ip link set dummy19 up +ip link set dummy20 up +ip link set dummy21 up +ip link set dummy22 up +ip link set dummy23 up +ip link set dummy24 up +ip link set dummy25 up +ip link set dummy26 up +ip link set dummy27 up +ip link set dummy28 up +ip link set dummy29 up +ip link set dummy30 up +ip link set dummy31 up +ip link set dummy32 up + +ip link set dummy33 up +ip link set dummy34 up + +ip link set vrf-red up +ip link set vrf-blue up + +ip link set dummyVRFred up +ip link set dummyVRFblue up + +ip ro add 1.1.1.1/32 dev dummy1 +ip ro add 1.1.1.2/32 dev dummy2 +ip ro add 1.1.1.3/32 dev dummy3 +ip ro add 1.1.1.4/32 dev dummy4 +ip ro add 1.1.1.5/32 dev dummy5 +ip ro add 1.1.1.6/32 dev dummy6 +ip ro add 1.1.1.7/32 dev dummy7 +ip ro add 1.1.1.8/32 dev dummy8 +ip ro add 1.1.1.9/32 dev dummy9 +ip ro add 1.1.1.10/32 dev dummy10 +ip ro add 1.1.1.11/32 dev dummy11 +ip ro add 1.1.1.12/32 dev dummy12 +ip ro add 1.1.1.13/32 dev dummy13 +ip ro add 1.1.1.14/32 dev dummy14 +ip ro add 1.1.1.15/32 dev dummy15 +ip ro add 1.1.1.16/32 dev dummy16 +ip ro add 1.1.1.17/32 dev dummy17 +ip ro add 1.1.1.18/32 dev dummy18 +ip ro add 1.1.1.19/32 dev dummy19 +ip ro add 1.1.1.20/32 dev dummy20 +ip ro add 1.1.1.21/32 dev dummy21 +ip ro add 1.1.1.22/32 dev dummy22 +ip ro add 1.1.1.23/32 dev dummy23 +ip ro add 1.1.1.24/32 dev dummy24 +ip ro add 1.1.1.25/32 dev dummy25 +ip ro add 1.1.1.26/32 dev dummy26 +ip ro add 1.1.1.27/32 dev dummy27 +ip ro add 1.1.1.28/32 dev dummy28 +ip ro add 1.1.1.29/32 dev dummy29 +ip ro add 1.1.1.30/32 dev dummy30 +ip ro add 1.1.1.31/32 dev dummy31 +ip ro add 1.1.1.32/32 dev dummy32 + +ip next add id 1 via 1.1.1.1 dev dummy1 +ip next add id 2 via 1.1.1.2 dev dummy2 +ip next add id 3 via 1.1.1.3 dev dummy3 +ip next add id 4 via 1.1.1.4 dev dummy4 +ip next add id 5 via 1.1.1.5 dev dummy5 +ip next add id 6 via 1.1.1.6 dev dummy6 +ip next add id 7 via 1.1.1.7 dev dummy7 +ip next add id 8 via 1.1.1.8 dev dummy8 +ip next add id 9 via 1.1.1.9 dev dummy9 +ip next add id 10 via 1.1.1.10 dev dummy10 +ip next add id 11 via 1.1.1.11 dev dummy11 +ip next add id 12 via 1.1.1.12 dev dummy12 +ip next add id 13 via 1.1.1.13 dev dummy13 +ip next add id 14 via 1.1.1.14 dev dummy14 +ip next add id 15 via 1.1.1.15 dev dummy15 +ip next add id 16 via 1.1.1.16 dev dummy16 +ip next add id 17 via 1.1.1.17 dev dummy17 +ip next add id 18 via 1.1.1.18 dev dummy18 +ip next add id 19 via 1.1.1.19 dev dummy19 +ip next add id 20 via 1.1.1.20 dev dummy20 +ip next add id 21 via 1.1.1.21 dev dummy21 +ip next add id 22 via 1.1.1.22 dev dummy22 +ip next add id 23 via 1.1.1.23 dev dummy23 +ip next add id 24 via 1.1.1.24 dev dummy24 +ip next add id 25 via 1.1.1.25 dev dummy25 +ip next add id 26 via 1.1.1.26 dev dummy26 +ip next add id 27 via 1.1.1.27 dev dummy27 +ip next add id 28 via 1.1.1.28 dev dummy28 +ip next add id 29 via 1.1.1.29 dev dummy29 +ip next add id 30 via 1.1.1.30 dev dummy30 +ip next add id 31 via 1.1.1.31 dev dummy31 +ip next add id 32 via 1.1.1.32 dev dummy32 + +i=100 + +while [ $i -le 200 ] +do +ip next add id $i group 1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19 + + echo $i + + ((i++)) + +done + +ip next add id 999 group 1/2/3/4/5/6 + +ip next ls + +======================== + +Fixes: ab84be7e54fc ("net: Initial nexthop code") +Signed-off-by: Stephen Worley +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/nexthop.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/nexthop.c ++++ b/net/ipv4/nexthop.c +@@ -276,6 +276,7 @@ out: + return 0; + + nla_put_failure: ++ nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + diff --git a/queue-5.6/net-qrtr-fix-passing-invalid-reference-to-qrtr_local_enqueue.patch b/queue-5.6/net-qrtr-fix-passing-invalid-reference-to-qrtr_local_enqueue.patch new file mode 100644 index 00000000000..188217ba475 --- /dev/null +++ b/queue-5.6/net-qrtr-fix-passing-invalid-reference-to-qrtr_local_enqueue.patch @@ -0,0 +1,38 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Manivannan Sadhasivam +Date: Tue, 19 May 2020 23:44:16 +0530 +Subject: net: qrtr: Fix passing invalid reference to qrtr_local_enqueue() + +From: Manivannan Sadhasivam + +[ Upstream commit d28ea1fbbf437054ef339afec241019f2c4e2bb6 ] + +Once the traversal of the list is completed with list_for_each_entry(), +the iterator (node) will point to an invalid object. So passing this to +qrtr_local_enqueue() which is outside of the iterator block is erroneous +eventhough the object is not used. + +So fix this by passing NULL to qrtr_local_enqueue(). + +Fixes: bdabad3e363d ("net: Add Qualcomm IPC router") +Reported-by: kbuild test robot +Reported-by: Julia Lawall +Signed-off-by: Manivannan Sadhasivam +Reviewed-by: Bjorn Andersson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/qrtr/qrtr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/qrtr/qrtr.c ++++ b/net/qrtr/qrtr.c +@@ -855,7 +855,7 @@ static int qrtr_bcast_enqueue(struct qrt + } + mutex_unlock(&qrtr_node_lock); + +- qrtr_local_enqueue(node, skb, type, from, to); ++ qrtr_local_enqueue(NULL, skb, type, from, to); + + return 0; + } diff --git a/queue-5.6/net-revert-net-get-rid-of-an-signed-integer-overflow-in-ip_idents_reserve.patch b/queue-5.6/net-revert-net-get-rid-of-an-signed-integer-overflow-in-ip_idents_reserve.patch new file mode 100644 index 00000000000..68ed46a2753 --- /dev/null +++ b/queue-5.6/net-revert-net-get-rid-of-an-signed-integer-overflow-in-ip_idents_reserve.patch @@ -0,0 +1,66 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Yuqi Jin +Date: Sat, 16 May 2020 11:46:49 +0800 +Subject: net: revert "net: get rid of an signed integer overflow in ip_idents_reserve()" + +From: Yuqi Jin + +[ Upstream commit a6211caa634da39d861a47437ffcda8b38ef421b ] + +Commit adb03115f459 ("net: get rid of an signed integer overflow in ip_idents_reserve()") +used atomic_cmpxchg to replace "atomic_add_return" inside the function +"ip_idents_reserve". The reason was to avoid UBSAN warning. +However, this change has caused performance degrade and in GCC-8, +fno-strict-overflow is now mapped to -fwrapv -fwrapv-pointer +and signed integer overflow is now undefined by default at all +optimization levels[1]. Moreover, it was a bug in UBSAN vs -fwrapv +/-fno-strict-overflow, so Let's revert it safely. + +[1] https://gcc.gnu.org/gcc-8/changes.html + +Suggested-by: Peter Zijlstra +Suggested-by: Eric Dumazet +Cc: "David S. Miller" +Cc: Alexey Kuznetsov +Cc: Hideaki YOSHIFUJI +Cc: Jakub Kicinski +Cc: Jiri Pirko +Cc: Arvind Sankar +Cc: Peter Zijlstra +Cc: Eric Dumazet +Cc: Jiong Wang +Signed-off-by: Yuqi Jin +Signed-off-by: Shaokun Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -491,18 +491,16 @@ u32 ip_idents_reserve(u32 hash, int segs + atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; + u32 old = READ_ONCE(*p_tstamp); + u32 now = (u32)jiffies; +- u32 new, delta = 0; ++ u32 delta = 0; + + if (old != now && cmpxchg(p_tstamp, old, now) == old) + delta = prandom_u32_max(now - old); + +- /* Do not use atomic_add_return() as it makes UBSAN unhappy */ +- do { +- old = (u32)atomic_read(p_id); +- new = old + delta + segs; +- } while (atomic_cmpxchg(p_id, old, new) != old); +- +- return new - segs; ++ /* If UBSAN reports an error there, please make sure your compiler ++ * supports -fno-strict-overflow before reporting it that was a bug ++ * in UBSAN, and it has been fixed in GCC-8. ++ */ ++ return atomic_add_return(segs + delta, p_id) - segs; + } + EXPORT_SYMBOL(ip_idents_reserve); + diff --git a/queue-5.6/net-sched-fix-reporting-the-first-time-use-timestamp.patch b/queue-5.6/net-sched-fix-reporting-the-first-time-use-timestamp.patch new file mode 100644 index 00000000000..fffabbacbe8 --- /dev/null +++ b/queue-5.6/net-sched-fix-reporting-the-first-time-use-timestamp.patch @@ -0,0 +1,37 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Roman Mashak +Date: Sun, 17 May 2020 08:46:31 -0400 +Subject: net sched: fix reporting the first-time use timestamp + +From: Roman Mashak + +[ Upstream commit b15e62631c5f19fea9895f7632dae9c1b27fe0cd ] + +When a new action is installed, firstuse field of 'tcf_t' is explicitly set +to 0. Value of zero means "new action, not yet used"; as a packet hits the +action, 'firstuse' is stamped with the current jiffies value. + +tcf_tm_dump() should return 0 for firstuse if action has not yet been hit. + +Fixes: 48d8ee1694dd ("net sched actions: aggregate dumping of actions timeinfo") +Cc: Jamal Hadi Salim +Signed-off-by: Roman Mashak +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/act_api.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/net/act_api.h ++++ b/include/net/act_api.h +@@ -69,7 +69,8 @@ static inline void tcf_tm_dump(struct tc + { + dtm->install = jiffies_to_clock_t(jiffies - stm->install); + dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse); +- dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse); ++ dtm->firstuse = stm->firstuse ? ++ jiffies_to_clock_t(jiffies - stm->firstuse) : 0; + dtm->expires = jiffies_to_clock_t(stm->expires); + } + diff --git a/queue-5.6/net-tls-fix-race-condition-causing-kernel-panic.patch b/queue-5.6/net-tls-fix-race-condition-causing-kernel-panic.patch new file mode 100644 index 00000000000..eac79bc92e7 --- /dev/null +++ b/queue-5.6/net-tls-fix-race-condition-causing-kernel-panic.patch @@ -0,0 +1,182 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Vinay Kumar Yadav +Date: Sat, 23 May 2020 01:40:31 +0530 +Subject: net/tls: fix race condition causing kernel panic + +From: Vinay Kumar Yadav + +[ Upstream commit 0cada33241d9de205522e3858b18e506ca5cce2c ] + +tls_sw_recvmsg() and tls_decrypt_done() can be run concurrently. +// tls_sw_recvmsg() + if (atomic_read(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + +//tls_decrypt_done() + pending = atomic_dec_return(&ctx->decrypt_pending); + + if (!pending && READ_ONCE(ctx->async_notify)) + complete(&ctx->async_wait.completion); + +Consider the scenario tls_decrypt_done() is about to run complete() + + if (!pending && READ_ONCE(ctx->async_notify)) + +and tls_sw_recvmsg() reads decrypt_pending == 0, does reinit_completion(), +then tls_decrypt_done() runs complete(). This sequence of execution +results in wrong completion. Consequently, for next decrypt request, +it will not wait for completion, eventually on connection close, crypto +resources freed, there is no way to handle pending decrypt response. + +This race condition can be avoided by having atomic_read() mutually +exclusive with atomic_dec_return(),complete().Intoduced spin lock to +ensure the mutual exclution. + +Addressed similar problem in tx direction. + +v1->v2: +- More readable commit message. +- Corrected the lock to fix new race scenario. +- Removed barrier which is not needed now. + +Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") +Signed-off-by: Vinay Kumar Yadav +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tls.h | 4 ++++ + net/tls/tls_sw.c | 33 +++++++++++++++++++++++++++------ + 2 files changed, 31 insertions(+), 6 deletions(-) + +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -135,6 +135,8 @@ struct tls_sw_context_tx { + struct tls_rec *open_rec; + struct list_head tx_list; + atomic_t encrypt_pending; ++ /* protect crypto_wait with encrypt_pending */ ++ spinlock_t encrypt_compl_lock; + int async_notify; + u8 async_capable:1; + +@@ -155,6 +157,8 @@ struct tls_sw_context_rx { + u8 async_capable:1; + u8 decrypted:1; + atomic_t decrypt_pending; ++ /* protect crypto_wait with decrypt_pending*/ ++ spinlock_t decrypt_compl_lock; + bool async_notify; + }; + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -206,10 +206,12 @@ static void tls_decrypt_done(struct cryp + + kfree(aead_req); + ++ spin_lock_bh(&ctx->decrypt_compl_lock); + pending = atomic_dec_return(&ctx->decrypt_pending); + +- if (!pending && READ_ONCE(ctx->async_notify)) ++ if (!pending && ctx->async_notify) + complete(&ctx->async_wait.completion); ++ spin_unlock_bh(&ctx->decrypt_compl_lock); + } + + static int tls_do_decryption(struct sock *sk, +@@ -467,10 +469,12 @@ static void tls_encrypt_done(struct cryp + ready = true; + } + ++ spin_lock_bh(&ctx->encrypt_compl_lock); + pending = atomic_dec_return(&ctx->encrypt_pending); + +- if (!pending && READ_ONCE(ctx->async_notify)) ++ if (!pending && ctx->async_notify) + complete(&ctx->async_wait.completion); ++ spin_unlock_bh(&ctx->encrypt_compl_lock); + + if (!ready) + return; +@@ -926,6 +930,7 @@ int tls_sw_sendmsg(struct sock *sk, stru + int num_zc = 0; + int orig_size; + int ret = 0; ++ int pending; + + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + return -EOPNOTSUPP; +@@ -1092,13 +1097,19 @@ trim_sgl: + goto send_end; + } else if (num_zc) { + /* Wait for pending encryptions to get completed */ +- smp_store_mb(ctx->async_notify, true); ++ spin_lock_bh(&ctx->encrypt_compl_lock); ++ ctx->async_notify = true; + +- if (atomic_read(&ctx->encrypt_pending)) ++ pending = atomic_read(&ctx->encrypt_pending); ++ spin_unlock_bh(&ctx->encrypt_compl_lock); ++ if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + ++ /* There can be no concurrent accesses, since we have no ++ * pending encrypt operations ++ */ + WRITE_ONCE(ctx->async_notify, false); + + if (ctx->async_wait.err) { +@@ -1729,6 +1740,7 @@ int tls_sw_recvmsg(struct sock *sk, + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); + bool is_peek = flags & MSG_PEEK; + int num_async = 0; ++ int pending; + + flags |= nonblock; + +@@ -1891,8 +1903,11 @@ pick_next_record: + recv_end: + if (num_async) { + /* Wait for all previously submitted records to be decrypted */ +- smp_store_mb(ctx->async_notify, true); +- if (atomic_read(&ctx->decrypt_pending)) { ++ spin_lock_bh(&ctx->decrypt_compl_lock); ++ ctx->async_notify = true; ++ pending = atomic_read(&ctx->decrypt_pending); ++ spin_unlock_bh(&ctx->decrypt_compl_lock); ++ if (pending) { + err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + if (err) { + /* one of async decrypt failed */ +@@ -1904,6 +1919,10 @@ recv_end: + } else { + reinit_completion(&ctx->async_wait.completion); + } ++ ++ /* There can be no concurrent accesses, since we have no ++ * pending decrypt operations ++ */ + WRITE_ONCE(ctx->async_notify, false); + + /* Drain records from the rx_list & copy if required */ +@@ -2290,6 +2309,7 @@ int tls_set_sw_offload(struct sock *sk, + + if (tx) { + crypto_init_wait(&sw_ctx_tx->async_wait); ++ spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + crypto_info = &ctx->crypto_send.info; + cctx = &ctx->tx; + aead = &sw_ctx_tx->aead_send; +@@ -2298,6 +2318,7 @@ int tls_set_sw_offload(struct sock *sk, + sw_ctx_tx->tx_work.sk = sk; + } else { + crypto_init_wait(&sw_ctx_rx->async_wait); ++ spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + crypto_info = &ctx->crypto_recv.info; + cctx = &ctx->rx; + skb_queue_head_init(&sw_ctx_rx->rx_list); diff --git a/queue-5.6/nexthop-fix-attribute-checking-for-groups.patch b/queue-5.6/nexthop-fix-attribute-checking-for-groups.patch new file mode 100644 index 00000000000..2c862792349 --- /dev/null +++ b/queue-5.6/nexthop-fix-attribute-checking-for-groups.patch @@ -0,0 +1,35 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: David Ahern +Date: Sun, 17 May 2020 11:26:32 -0600 +Subject: nexthop: Fix attribute checking for groups + +From: David Ahern + +[ Upstream commit 84be69b869a5a496a6cfde9b3c29509207a1f1fa ] + +For nexthop groups, attributes after NHA_GROUP_TYPE are invalid, but +nh_check_attr_group starts checking at NHA_GROUP. The group type defaults +to multipath and the NHA_GROUP_TYPE is currently optional so this has +slipped through so far. Fix the attribute checking to handle support of +new group types. + +Fixes: 430a049190de ("nexthop: Add support for nexthop groups") +Signed-off-by: ASSOGBA Emery +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/nexthop.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/nexthop.c ++++ b/net/ipv4/nexthop.c +@@ -434,7 +434,7 @@ static int nh_check_attr_group(struct ne + if (!valid_group_nh(nh, len, extack)) + return -EINVAL; + } +- for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { ++ for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { + if (!tb[i]) + continue; + diff --git a/queue-5.6/r8152-support-additional-microsoft-surface-ethernet-adapter-variant.patch b/queue-5.6/r8152-support-additional-microsoft-surface-ethernet-adapter-variant.patch new file mode 100644 index 00000000000..9f86fff93be --- /dev/null +++ b/queue-5.6/r8152-support-additional-microsoft-surface-ethernet-adapter-variant.patch @@ -0,0 +1,60 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Marc Payne +Date: Tue, 19 May 2020 19:01:46 +0100 +Subject: r8152: support additional Microsoft Surface Ethernet Adapter variant + +From: Marc Payne + +[ Upstream commit c27a204383616efba5a4194075e90819961ff66a ] + +Device id 0927 is the RTL8153B-based component of the 'Surface USB-C to +Ethernet and USB Adapter' and may be used as a component of other devices +in future. Tested and working with the r8152 driver. + +Update the cdc_ether blacklist due to the RTL8153 'network jam on suspend' +issue which this device will cause (personally confirmed). + +Signed-off-by: Marc Payne +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/cdc_ether.c | 11 +++++++++-- + drivers/net/usb/r8152.c | 1 + + 2 files changed, 10 insertions(+), 2 deletions(-) + +--- a/drivers/net/usb/cdc_ether.c ++++ b/drivers/net/usb/cdc_ether.c +@@ -815,14 +815,21 @@ static const struct usb_device_id produc + .driver_info = 0, + }, + +-/* Microsoft Surface 3 dock (based on Realtek RTL8153) */ ++/* Microsoft Surface Ethernet Adapter (based on Realtek RTL8153) */ + { + USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, + }, + +- /* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ ++/* Microsoft Surface Ethernet Adapter (based on Realtek RTL8153B) */ ++{ ++ USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x0927, USB_CLASS_COMM, ++ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), ++ .driver_info = 0, ++}, ++ ++/* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ + { + USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, 0x0601, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -6901,6 +6901,7 @@ static const struct usb_device_id rtl815 + {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)}, ++ {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927)}, + {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)}, diff --git a/queue-5.6/r8169-fix-ocp-access-on-rtl8117.patch b/queue-5.6/r8169-fix-ocp-access-on-rtl8117.patch new file mode 100644 index 00000000000..d40fcbd6357 --- /dev/null +++ b/queue-5.6/r8169-fix-ocp-access-on-rtl8117.patch @@ -0,0 +1,63 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Heiner Kallweit +Date: Thu, 21 May 2020 22:03:08 +0200 +Subject: r8169: fix OCP access on RTL8117 + +From: Heiner Kallweit + +[ Upstream commit 561535b0f23961ced071b82575d5e83e6351a814 ] + +According to r8168 vendor driver DASHv3 chips like RTL8168fp/RTL8117 +need a special addressing for OCP access. +Fix is compile-tested only due to missing test hardware. + +Fixes: 1287723aa139 ("r8169: add support for RTL8117") +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169_main.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -1044,6 +1044,13 @@ static u16 rtl_ephy_read(struct rtl8169_ + RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0; + } + ++static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int type) ++{ ++ /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ ++ if (tp->mac_version == RTL_GIGA_MAC_VER_52 && type == ERIAR_OOB) ++ *cmd |= 0x7f0 << 18; ++} ++ + DECLARE_RTL_COND(rtl_eriar_cond) + { + return RTL_R32(tp, ERIAR) & ERIAR_FLAG; +@@ -1052,9 +1059,12 @@ DECLARE_RTL_COND(rtl_eriar_cond) + static void _rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask, + u32 val, int type) + { ++ u32 cmd = ERIAR_WRITE_CMD | type | mask | addr; ++ + BUG_ON((addr & 3) || (mask == 0)); + RTL_W32(tp, ERIDR, val); +- RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr); ++ r8168fp_adjust_ocp_cmd(tp, &cmd, type); ++ RTL_W32(tp, ERIAR, cmd); + + rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100); + } +@@ -1067,7 +1077,10 @@ static void rtl_eri_write(struct rtl8169 + + static u32 _rtl_eri_read(struct rtl8169_private *tp, int addr, int type) + { +- RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr); ++ u32 cmd = ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr; ++ ++ r8168fp_adjust_ocp_cmd(tp, &cmd, type); ++ RTL_W32(tp, ERIAR, cmd); + + return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ? + RTL_R32(tp, ERIDR) : ~0; diff --git a/queue-5.6/sctp-don-t-add-the-shutdown-timer-if-its-already-been-added.patch b/queue-5.6/sctp-don-t-add-the-shutdown-timer-if-its-already-been-added.patch new file mode 100644 index 00000000000..98d37e37958 --- /dev/null +++ b/queue-5.6/sctp-don-t-add-the-shutdown-timer-if-its-already-been-added.patch @@ -0,0 +1,81 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Neil Horman +Date: Tue, 19 May 2020 16:04:05 -0400 +Subject: sctp: Don't add the shutdown timer if its already been added + +From: Neil Horman + +[ Upstream commit 20a785aa52c82246055a089e55df9dac47d67da1 ] + +This BUG halt was reported a while back, but the patch somehow got +missed: + +PID: 2879 TASK: c16adaa0 CPU: 1 COMMAND: "sctpn" + #0 [f418dd28] crash_kexec at c04a7d8c + #1 [f418dd7c] oops_end at c0863e02 + #2 [f418dd90] do_invalid_op at c040aaca + #3 [f418de28] error_code (via invalid_op) at c08631a5 + EAX: f34baac0 EBX: 00000090 ECX: f418deb0 EDX: f5542950 EBP: 00000000 + DS: 007b ESI: f34ba800 ES: 007b EDI: f418dea0 GS: 00e0 + CS: 0060 EIP: c046fa5e ERR: ffffffff EFLAGS: 00010286 + #4 [f418de5c] add_timer at c046fa5e + #5 [f418de68] sctp_do_sm at f8db8c77 [sctp] + #6 [f418df30] sctp_primitive_SHUTDOWN at f8dcc1b5 [sctp] + #7 [f418df48] inet_shutdown at c080baf9 + #8 [f418df5c] sys_shutdown at c079eedf + #9 [f418df70] sys_socketcall at c079fe88 + EAX: ffffffda EBX: 0000000d ECX: bfceea90 EDX: 0937af98 + DS: 007b ESI: 0000000c ES: 007b EDI: b7150ae4 + SS: 007b ESP: bfceea7c EBP: bfceeaa8 GS: 0033 + CS: 0073 EIP: b775c424 ERR: 00000066 EFLAGS: 00000282 + +It appears that the side effect that starts the shutdown timer was processed +multiple times, which can happen as multiple paths can trigger it. This of +course leads to the BUG halt in add_timer getting called. + +Fix seems pretty straightforward, just check before the timer is added if its +already been started. If it has mod the timer instead to min(current +expiration, new expiration) + +Its been tested but not confirmed to fix the problem, as the issue has only +occured in production environments where test kernels are enjoined from being +installed. It appears to be a sane fix to me though. Also, recentely, +Jere found a reproducer posted on list to confirm that this resolves the +issues + +Signed-off-by: Neil Horman +CC: Vlad Yasevich +CC: "David S. Miller" +CC: jere.leppanen@nokia.com +CC: marcelo.leitner@gmail.com +CC: netdev@vger.kernel.org +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_sideeffect.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/sctp/sm_sideeffect.c ++++ b/net/sctp/sm_sideeffect.c +@@ -1523,9 +1523,17 @@ static int sctp_cmd_interpreter(enum sct + timeout = asoc->timeouts[cmd->obj.to]; + BUG_ON(!timeout); + +- timer->expires = jiffies + timeout; +- sctp_association_hold(asoc); +- add_timer(timer); ++ /* ++ * SCTP has a hard time with timer starts. Because we process ++ * timer starts as side effects, it can be hard to tell if we ++ * have already started a timer or not, which leads to BUG ++ * halts when we call add_timer. So here, instead of just starting ++ * a timer, if the timer is already started, and just mod ++ * the timer with the shorter of the two expiration times ++ */ ++ if (!timer_pending(timer)) ++ sctp_association_hold(asoc); ++ timer_reduce(timer, jiffies + timeout); + break; + + case SCTP_CMD_TIMER_RESTART: diff --git a/queue-5.6/sctp-start-shutdown-on-association-restart-if-in-shutdown-sent-state-and-socket-is-closed.patch b/queue-5.6/sctp-start-shutdown-on-association-restart-if-in-shutdown-sent-state-and-socket-is-closed.patch new file mode 100644 index 00000000000..2399899f36b --- /dev/null +++ b/queue-5.6/sctp-start-shutdown-on-association-restart-if-in-shutdown-sent-state-and-socket-is-closed.patch @@ -0,0 +1,69 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: "Jere Leppänen" +Date: Wed, 20 May 2020 18:15:31 +0300 +Subject: sctp: Start shutdown on association restart if in SHUTDOWN-SENT state and socket is closed + +From: "Jere Leppänen" + +[ Upstream commit d3e8e4c11870413789f029a71e72ae6e971fe678 ] + +Commit bdf6fa52f01b ("sctp: handle association restarts when the +socket is closed.") starts shutdown when an association is restarted, +if in SHUTDOWN-PENDING state and the socket is closed. However, the +rationale stated in that commit applies also when in SHUTDOWN-SENT +state - we don't want to move an association to ESTABLISHED state when +the socket has been closed, because that results in an association +that is unreachable from user space. + +The problem scenario: + +1. Client crashes and/or restarts. + +2. Server (using one-to-one socket) calls close(). SHUTDOWN is lost. + +3. Client reconnects using the same addresses and ports. + +4. Server's association is restarted. The association and the socket + move to ESTABLISHED state, even though the server process has + closed its descriptor. + +Also, after step 4 when the server process exits, some resources are +leaked in an attempt to release the underlying inet sock structure in +ESTABLISHED state: + + IPv4: Attempt to release TCP socket in state 1 00000000377288c7 + +Fix by acting the same way as in SHUTDOWN-PENDING state. That is, if +an association is restarted in SHUTDOWN-SENT state and the socket is +closed, then start shutdown and don't move the association or the +socket to ESTABLISHED state. + +Fixes: bdf6fa52f01b ("sctp: handle association restarts when the socket is closed.") +Signed-off-by: Jere Leppänen +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_statefuns.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -1856,12 +1856,13 @@ static enum sctp_disposition sctp_sf_do_ + /* Update the content of current association. */ + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); +- if (sctp_state(asoc, SHUTDOWN_PENDING) && ++ if ((sctp_state(asoc, SHUTDOWN_PENDING) || ++ sctp_state(asoc, SHUTDOWN_SENT)) && + (sctp_sstate(asoc->base.sk, CLOSING) || + sock_flag(asoc->base.sk, SOCK_DEAD))) { +- /* if were currently in SHUTDOWN_PENDING, but the socket +- * has been closed by user, don't transition to ESTABLISHED. +- * Instead trigger SHUTDOWN bundled with COOKIE_ACK. ++ /* If the socket has been closed by user, don't ++ * transition to ESTABLISHED. Instead trigger SHUTDOWN ++ * bundled with COOKIE_ACK. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, diff --git a/queue-5.6/series b/queue-5.6/series new file mode 100644 index 00000000000..bcf12efcbc2 --- /dev/null +++ b/queue-5.6/series @@ -0,0 +1,33 @@ +ax25-fix-setsockopt-so_bindtodevice.patch +dpaa_eth-fix-usage-as-dsa-master-try-3.patch +ethtool-count-header-size-in-reply-size-estimate.patch +felix-fix-initialization-of-ioremap-resources.patch +net-don-t-return-invalid-table-id-error-when-we-fall-back-to-pf_unspec.patch +net-dsa-mt7530-fix-roaming-from-dsa-user-ports.patch +net-ethernet-ti-cpsw-fix-assert_rtnl-warning-during-suspend.patch +__netif_receive_skb_core-pass-skb-by-reference.patch +net-inet_csk-fix-so_reuseport-bind-address-cache-in-tb-fast.patch +net-ipip-fix-wrong-address-family-in-init-error-path.patch +net-mlx5-add-command-entry-handling-completion.patch +net-mvpp2-fix-rx-hashing-for-non-10g-ports.patch +net-nlmsg_cancel-if-put-fails-for-nhmsg.patch +net-qrtr-fix-passing-invalid-reference-to-qrtr_local_enqueue.patch +net-revert-net-get-rid-of-an-signed-integer-overflow-in-ip_idents_reserve.patch +net-sched-fix-reporting-the-first-time-use-timestamp.patch +net-tls-fix-race-condition-causing-kernel-panic.patch +nexthop-fix-attribute-checking-for-groups.patch +r8152-support-additional-microsoft-surface-ethernet-adapter-variant.patch +sctp-don-t-add-the-shutdown-timer-if-its-already-been-added.patch +sctp-start-shutdown-on-association-restart-if-in-shutdown-sent-state-and-socket-is-closed.patch +tipc-block-bh-before-using-dst_cache.patch +net-mlx5e-ktls-destroy-key-object-after-destroying-the-tis.patch +net-mlx5e-fix-inner-tirs-handling.patch +net-mlx5-fix-memory-leak-in-mlx5_events_init.patch +net-mlx5e-update-netdev-txq-on-completions-during-closure.patch +net-mlx5-fix-error-flow-in-case-of-function_setup-failure.patch +wireguard-noise-read-preshared-key-while-taking-lock.patch +wireguard-queueing-preserve-flow-hash-across-packet-scrubbing.patch +wireguard-noise-separate-receive-counter-from-send-counter.patch +r8169-fix-ocp-access-on-rtl8117.patch +net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch +net-mlx5-fix-cleaning-unmanaged-flow-tables.patch diff --git a/queue-5.6/tipc-block-bh-before-using-dst_cache.patch b/queue-5.6/tipc-block-bh-before-using-dst_cache.patch new file mode 100644 index 00000000000..1b24f891169 --- /dev/null +++ b/queue-5.6/tipc-block-bh-before-using-dst_cache.patch @@ -0,0 +1,84 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: Eric Dumazet +Date: Thu, 21 May 2020 11:29:58 -0700 +Subject: tipc: block BH before using dst_cache + +From: Eric Dumazet + +[ Upstream commit 1378817486d6860f6a927f573491afe65287abf1 ] + +dst_cache_get() documents it must be used with BH disabled. + +sysbot reported : + +BUG: using smp_processor_id() in preemptible [00000000] code: /21697 +caller is dst_cache_get+0x3a/0xb0 net/core/dst_cache.c:68 +CPU: 0 PID: 21697 Comm: Not tainted 5.7.0-rc6-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x188/0x20d lib/dump_stack.c:118 + check_preemption_disabled lib/smp_processor_id.c:47 [inline] + debug_smp_processor_id.cold+0x88/0x9b lib/smp_processor_id.c:57 + dst_cache_get+0x3a/0xb0 net/core/dst_cache.c:68 + tipc_udp_xmit.isra.0+0xb9/0xad0 net/tipc/udp_media.c:164 + tipc_udp_send_msg+0x3e6/0x490 net/tipc/udp_media.c:244 + tipc_bearer_xmit_skb+0x1de/0x3f0 net/tipc/bearer.c:526 + tipc_enable_bearer+0xb2f/0xd60 net/tipc/bearer.c:331 + __tipc_nl_bearer_enable+0x2bf/0x390 net/tipc/bearer.c:995 + tipc_nl_bearer_enable+0x1e/0x30 net/tipc/bearer.c:1003 + genl_family_rcv_msg_doit net/netlink/genetlink.c:673 [inline] + genl_family_rcv_msg net/netlink/genetlink.c:718 [inline] + genl_rcv_msg+0x627/0xdf0 net/netlink/genetlink.c:735 + netlink_rcv_skb+0x15a/0x410 net/netlink/af_netlink.c:2469 + genl_rcv+0x24/0x40 net/netlink/genetlink.c:746 + netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] + netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 + netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 + sock_sendmsg_nosec net/socket.c:652 [inline] + sock_sendmsg+0xcf/0x120 net/socket.c:672 + ____sys_sendmsg+0x6bf/0x7e0 net/socket.c:2362 + ___sys_sendmsg+0x100/0x170 net/socket.c:2416 + __sys_sendmsg+0xec/0x1b0 net/socket.c:2449 + do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295 + entry_SYSCALL_64_after_hwframe+0x49/0xb3 +RIP: 0033:0x45ca29 + +Fixes: e9c1a793210f ("tipc: add dst_cache support for udp media") +Cc: Xin Long +Cc: Jon Maloy +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/udp_media.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/tipc/udp_media.c ++++ b/net/tipc/udp_media.c +@@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net + struct udp_bearer *ub, struct udp_media_addr *src, + struct udp_media_addr *dst, struct dst_cache *cache) + { +- struct dst_entry *ndst = dst_cache_get(cache); ++ struct dst_entry *ndst; + int ttl, err = 0; + ++ local_bh_disable(); ++ ndst = dst_cache_get(cache); + if (dst->proto == htons(ETH_P_IP)) { + struct rtable *rt = (struct rtable *)ndst; + +@@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net + src->port, dst->port, false); + #endif + } ++ local_bh_enable(); + return err; + + tx_error: ++ local_bh_enable(); + kfree_skb(skb); + return err; + } diff --git a/queue-5.6/wireguard-noise-read-preshared-key-while-taking-lock.patch b/queue-5.6/wireguard-noise-read-preshared-key-while-taking-lock.patch new file mode 100644 index 00000000000..3b4b04e6fef --- /dev/null +++ b/queue-5.6/wireguard-noise-read-preshared-key-while-taking-lock.patch @@ -0,0 +1,63 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: "Jason A. Donenfeld" +Date: Tue, 19 May 2020 22:49:28 -0600 +Subject: wireguard: noise: read preshared key while taking lock + +From: "Jason A. Donenfeld" + +[ Upstream commit bc67d371256f5c47d824e2eec51e46c8d62d022e ] + +Prior we read the preshared key after dropping the handshake lock, which +isn't an actual crypto issue if it races, but it's still not quite +correct. So copy that part of the state into a temporary like we do with +the rest of the handshake state variables. Then we can release the lock, +operate on the temporary, and zero it out at the end of the function. In +performance tests, the impact of this was entirely unnoticable, probably +because those bytes are coming from the same cacheline as other things +that are being copied out in the same manner. + +Reported-by: Matt Dunwoodie +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") +Signed-off-by: Jason A. Donenfeld +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireguard/noise.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/wireguard/noise.c ++++ b/drivers/net/wireguard/noise.c +@@ -715,6 +715,7 @@ wg_noise_handshake_consume_response(stru + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; ++ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + + down_read(&wg->static_identity.lock); + +@@ -733,6 +734,8 @@ wg_noise_handshake_consume_response(stru + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); + memcpy(ephemeral_private, handshake->ephemeral_private, + NOISE_PUBLIC_KEY_LEN); ++ memcpy(preshared_key, handshake->preshared_key, ++ NOISE_SYMMETRIC_KEY_LEN); + up_read(&handshake->lock); + + if (state != HANDSHAKE_CREATED_INITIATION) +@@ -750,7 +753,7 @@ wg_noise_handshake_consume_response(stru + goto fail; + + /* psk */ +- mix_psk(chaining_key, hash, key, handshake->preshared_key); ++ mix_psk(chaining_key, hash, key, preshared_key); + + /* {} */ + if (!message_decrypt(NULL, src->encrypted_nothing, +@@ -783,6 +786,7 @@ out: + memzero_explicit(chaining_key, NOISE_HASH_LEN); + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); ++ memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN); + up_read(&wg->static_identity.lock); + return ret_peer; + } diff --git a/queue-5.6/wireguard-noise-separate-receive-counter-from-send-counter.patch b/queue-5.6/wireguard-noise-separate-receive-counter-from-send-counter.patch new file mode 100644 index 00000000000..ad1fbd38b16 --- /dev/null +++ b/queue-5.6/wireguard-noise-separate-receive-counter-from-send-counter.patch @@ -0,0 +1,332 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: "Jason A. Donenfeld" +Date: Tue, 19 May 2020 22:49:30 -0600 +Subject: wireguard: noise: separate receive counter from send counter + +From: "Jason A. Donenfeld" + +[ Upstream commit a9e90d9931f3a474f04bab782ccd9d77904941e9 ] + +In "wireguard: queueing: preserve flow hash across packet scrubbing", we +were required to slightly increase the size of the receive replay +counter to something still fairly small, but an increase nonetheless. +It turns out that we can recoup some of the additional memory overhead +by splitting up the prior union type into two distinct types. Before, we +used the same "noise_counter" union for both sending and receiving, with +sending just using a simple atomic64_t, while receiving used the full +replay counter checker. This meant that most of the memory being +allocated for the sending counter was being wasted. Since the old +"noise_counter" type increased in size in the prior commit, now is a +good time to split up that union type into a distinct "noise_replay_ +counter" for receiving and a boring atomic64_t for sending, each using +neither more nor less memory than required. + +Also, since sometimes the replay counter is accessed without +necessitating additional accesses to the bitmap, we can reduce cache +misses by hoisting the always-necessary lock above the bitmap in the +struct layout. We also change a "noise_replay_counter" stack allocation +to kmalloc in a -DDEBUG selftest so that KASAN doesn't trigger a stack +frame warning. + +All and all, removing a bit of abstraction in this commit makes the code +simpler and smaller, in addition to the motivating memory usage +recuperation. For example, passing around raw "noise_symmetric_key" +structs is something that really only makes sense within noise.c, in the +one place where the sending and receiving keys can safely be thought of +as the same type of object; subsequent to that, it's important that we +uniformly access these through keypair->{sending,receiving}, where their +distinct roles are always made explicit. So this patch allows us to draw +that distinction clearly as well. + +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") +Signed-off-by: Jason A. Donenfeld +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireguard/noise.c | 16 ++--------- + drivers/net/wireguard/noise.h | 14 ++++------ + drivers/net/wireguard/receive.c | 42 +++++++++++++++---------------- + drivers/net/wireguard/selftest/counter.c | 17 ++++++++---- + drivers/net/wireguard/send.c | 12 +++----- + 5 files changed, 48 insertions(+), 53 deletions(-) + +--- a/drivers/net/wireguard/noise.c ++++ b/drivers/net/wireguard/noise.c +@@ -104,6 +104,7 @@ static struct noise_keypair *keypair_cre + + if (unlikely(!keypair)) + return NULL; ++ spin_lock_init(&keypair->receiving_counter.lock); + keypair->internal_id = atomic64_inc_return(&keypair_counter); + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; + keypair->entry.peer = peer; +@@ -358,25 +359,16 @@ out: + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); + } + +-static void symmetric_key_init(struct noise_symmetric_key *key) +-{ +- spin_lock_init(&key->counter.receive.lock); +- atomic64_set(&key->counter.counter, 0); +- memset(key->counter.receive.backtrack, 0, +- sizeof(key->counter.receive.backtrack)); +- key->birthdate = ktime_get_coarse_boottime_ns(); +- key->is_valid = true; +-} +- + static void derive_keys(struct noise_symmetric_key *first_dst, + struct noise_symmetric_key *second_dst, + const u8 chaining_key[NOISE_HASH_LEN]) + { ++ u64 birthdate = ktime_get_coarse_boottime_ns(); + kdf(first_dst->key, second_dst->key, NULL, NULL, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, + chaining_key); +- symmetric_key_init(first_dst); +- symmetric_key_init(second_dst); ++ first_dst->birthdate = second_dst->birthdate = birthdate; ++ first_dst->is_valid = second_dst->is_valid = true; + } + + static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], +--- a/drivers/net/wireguard/noise.h ++++ b/drivers/net/wireguard/noise.h +@@ -15,18 +15,14 @@ + #include + #include + +-union noise_counter { +- struct { +- u64 counter; +- unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; +- spinlock_t lock; +- } receive; +- atomic64_t counter; ++struct noise_replay_counter { ++ u64 counter; ++ spinlock_t lock; ++ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; + }; + + struct noise_symmetric_key { + u8 key[NOISE_SYMMETRIC_KEY_LEN]; +- union noise_counter counter; + u64 birthdate; + bool is_valid; + }; +@@ -34,7 +30,9 @@ struct noise_symmetric_key { + struct noise_keypair { + struct index_hashtable_entry entry; + struct noise_symmetric_key sending; ++ atomic64_t sending_counter; + struct noise_symmetric_key receiving; ++ struct noise_replay_counter receiving_counter; + __le32 remote_index; + bool i_am_the_initiator; + struct kref refcount; +--- a/drivers/net/wireguard/receive.c ++++ b/drivers/net/wireguard/receive.c +@@ -246,20 +246,20 @@ static void keep_key_fresh(struct wg_pee + } + } + +-static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) ++static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) + { + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct sk_buff *trailer; + unsigned int offset; + int num_frags; + +- if (unlikely(!key)) ++ if (unlikely(!keypair)) + return false; + +- if (unlikely(!READ_ONCE(key->is_valid) || +- wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || +- key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { +- WRITE_ONCE(key->is_valid, false); ++ if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || ++ wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || ++ keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { ++ WRITE_ONCE(keypair->receiving.is_valid, false); + return false; + } + +@@ -284,7 +284,7 @@ static bool decrypt_packet(struct sk_buf + + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, +- key->key)) ++ keypair->receiving.key)) + return false; + + /* Another ugly situation of pushing and pulling the header so as to +@@ -299,41 +299,41 @@ static bool decrypt_packet(struct sk_buf + } + + /* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ +-static bool counter_validate(union noise_counter *counter, u64 their_counter) ++static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter) + { + unsigned long index, index_current, top, i; + bool ret = false; + +- spin_lock_bh(&counter->receive.lock); ++ spin_lock_bh(&counter->lock); + +- if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || ++ if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 || + their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < +- counter->receive.counter)) ++ counter->counter)) + goto out; + + index = their_counter >> ilog2(BITS_PER_LONG); + +- if (likely(their_counter > counter->receive.counter)) { +- index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); ++ if (likely(their_counter > counter->counter)) { ++ index_current = counter->counter >> ilog2(BITS_PER_LONG); + top = min_t(unsigned long, index - index_current, + COUNTER_BITS_TOTAL / BITS_PER_LONG); + for (i = 1; i <= top; ++i) +- counter->receive.backtrack[(i + index_current) & ++ counter->backtrack[(i + index_current) & + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; +- counter->receive.counter = their_counter; ++ counter->counter = their_counter; + } + + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), +- &counter->receive.backtrack[index]); ++ &counter->backtrack[index]); + + out: +- spin_unlock_bh(&counter->receive.lock); ++ spin_unlock_bh(&counter->lock); + return ret; + } + +@@ -473,12 +473,12 @@ int wg_packet_rx_poll(struct napi_struct + if (unlikely(state != PACKET_STATE_CRYPTED)) + goto next; + +- if (unlikely(!counter_validate(&keypair->receiving.counter, ++ if (unlikely(!counter_validate(&keypair->receiving_counter, + PACKET_CB(skb)->nonce))) { + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", + peer->device->dev->name, + PACKET_CB(skb)->nonce, +- keypair->receiving.counter.receive.counter); ++ keypair->receiving_counter.counter); + goto next; + } + +@@ -512,8 +512,8 @@ void wg_packet_decrypt_worker(struct wor + struct sk_buff *skb; + + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { +- enum packet_state state = likely(decrypt_packet(skb, +- &PACKET_CB(skb)->keypair->receiving)) ? ++ enum packet_state state = ++ likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; + wg_queue_enqueue_per_peer_napi(skb, state); + if (need_resched()) +--- a/drivers/net/wireguard/selftest/counter.c ++++ b/drivers/net/wireguard/selftest/counter.c +@@ -6,18 +6,24 @@ + #ifdef DEBUG + bool __init wg_packet_counter_selftest(void) + { ++ struct noise_replay_counter *counter; + unsigned int test_num = 0, i; +- union noise_counter counter; + bool success = true; + +-#define T_INIT do { \ +- memset(&counter, 0, sizeof(union noise_counter)); \ +- spin_lock_init(&counter.receive.lock); \ ++ counter = kmalloc(sizeof(*counter), GFP_KERNEL); ++ if (unlikely(!counter)) { ++ pr_err("nonce counter self-test malloc: FAIL\n"); ++ return false; ++ } ++ ++#define T_INIT do { \ ++ memset(counter, 0, sizeof(*counter)); \ ++ spin_lock_init(&counter->lock); \ + } while (0) + #define T_LIM (COUNTER_WINDOW_SIZE + 1) + #define T(n, v) do { \ + ++test_num; \ +- if (counter_validate(&counter, n) != (v)) { \ ++ if (counter_validate(counter, n) != (v)) { \ + pr_err("nonce counter self-test %u: FAIL\n", \ + test_num); \ + success = false; \ +@@ -99,6 +105,7 @@ bool __init wg_packet_counter_selftest(v + + if (success) + pr_info("nonce counter self-tests: pass\n"); ++ kfree(counter); + return success; + } + #endif +--- a/drivers/net/wireguard/send.c ++++ b/drivers/net/wireguard/send.c +@@ -129,7 +129,7 @@ static void keep_key_fresh(struct wg_pee + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && +- (unlikely(atomic64_read(&keypair->sending.counter.counter) > ++ (unlikely(atomic64_read(&keypair->sending_counter) > + REKEY_AFTER_MESSAGES) || + (keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, +@@ -353,7 +353,6 @@ void wg_packet_purge_staged_packets(stru + + void wg_packet_send_staged_packets(struct wg_peer *peer) + { +- struct noise_symmetric_key *key; + struct noise_keypair *keypair; + struct sk_buff_head packets; + struct sk_buff *skb; +@@ -373,10 +372,9 @@ void wg_packet_send_staged_packets(struc + rcu_read_unlock_bh(); + if (unlikely(!keypair)) + goto out_nokey; +- key = &keypair->sending; +- if (unlikely(!READ_ONCE(key->is_valid))) ++ if (unlikely(!READ_ONCE(keypair->sending.is_valid))) + goto out_nokey; +- if (unlikely(wg_birthdate_has_expired(key->birthdate, ++ if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REJECT_AFTER_TIME))) + goto out_invalid; + +@@ -391,7 +389,7 @@ void wg_packet_send_staged_packets(struc + */ + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); + PACKET_CB(skb)->nonce = +- atomic64_inc_return(&key->counter.counter) - 1; ++ atomic64_inc_return(&keypair->sending_counter) - 1; + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) + goto out_invalid; + } +@@ -403,7 +401,7 @@ void wg_packet_send_staged_packets(struc + return; + + out_invalid: +- WRITE_ONCE(key->is_valid, false); ++ WRITE_ONCE(keypair->sending.is_valid, false); + out_nokey: + wg_noise_keypair_put(keypair, false); + diff --git a/queue-5.6/wireguard-queueing-preserve-flow-hash-across-packet-scrubbing.patch b/queue-5.6/wireguard-queueing-preserve-flow-hash-across-packet-scrubbing.patch new file mode 100644 index 00000000000..d84ba4b69dd --- /dev/null +++ b/queue-5.6/wireguard-queueing-preserve-flow-hash-across-packet-scrubbing.patch @@ -0,0 +1,114 @@ +From foo@baz Wed 27 May 2020 07:13:24 PM CEST +From: "Jason A. Donenfeld" +Date: Tue, 19 May 2020 22:49:29 -0600 +Subject: wireguard: queueing: preserve flow hash across packet scrubbing + +From: "Jason A. Donenfeld" + +[ Upstream commit c78a0b4a78839d572d8a80f6a62221c0d7843135 ] + +It's important that we clear most header fields during encapsulation and +decapsulation, because the packet is substantially changed, and we don't +want any info leak or logic bug due to an accidental correlation. But, +for encapsulation, it's wrong to clear skb->hash, since it's used by +fq_codel and flow dissection in general. Without it, classification does +not proceed as usual. This change might make it easier to estimate the +number of innerflows by examining clustering of out of order packets, +but this shouldn't open up anything that can't already be inferred +otherwise (e.g. syn packet size inference), and fq_codel can be disabled +anyway. + +Furthermore, it might be the case that the hash isn't used or queried at +all until after wireguard transmits the encrypted UDP packet, which +means skb->hash might still be zero at this point, and thus no hash +taken over the inner packet data. In order to address this situation, we +force a calculation of skb->hash before encrypting packet data. + +Of course this means that fq_codel might transmit packets slightly more +out of order than usual. Toke did some testing on beefy machines with +high quantities of parallel flows and found that increasing the +reply-attack counter to 8192 takes care of the most pathological cases +pretty well. + +Reported-by: Dave Taht +Reviewed-and-tested-by: Toke Høiland-Jørgensen +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") +Signed-off-by: Jason A. Donenfeld +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireguard/messages.h | 2 +- + drivers/net/wireguard/queueing.h | 10 +++++++++- + drivers/net/wireguard/receive.c | 2 +- + drivers/net/wireguard/send.c | 7 ++++++- + 4 files changed, 17 insertions(+), 4 deletions(-) + +--- a/drivers/net/wireguard/messages.h ++++ b/drivers/net/wireguard/messages.h +@@ -32,7 +32,7 @@ enum cookie_values { + }; + + enum counter_values { +- COUNTER_BITS_TOTAL = 2048, ++ COUNTER_BITS_TOTAL = 8192, + COUNTER_REDUNDANT_BITS = BITS_PER_LONG, + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS + }; +--- a/drivers/net/wireguard/queueing.h ++++ b/drivers/net/wireguard/queueing.h +@@ -87,12 +87,20 @@ static inline bool wg_check_packet_proto + return real_protocol && skb->protocol == real_protocol; + } + +-static inline void wg_reset_packet(struct sk_buff *skb) ++static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) + { ++ u8 l4_hash = skb->l4_hash; ++ u8 sw_hash = skb->sw_hash; ++ u32 hash = skb->hash; + skb_scrub_packet(skb, true); + memset(&skb->headers_start, 0, + offsetof(struct sk_buff, headers_end) - + offsetof(struct sk_buff, headers_start)); ++ if (encapsulating) { ++ skb->l4_hash = l4_hash; ++ skb->sw_hash = sw_hash; ++ skb->hash = hash; ++ } + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; +--- a/drivers/net/wireguard/receive.c ++++ b/drivers/net/wireguard/receive.c +@@ -485,7 +485,7 @@ int wg_packet_rx_poll(struct napi_struct + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) + goto next; + +- wg_reset_packet(skb); ++ wg_reset_packet(skb, false); + wg_packet_consume_data_done(peer, skb, &endpoint); + free = false; + +--- a/drivers/net/wireguard/send.c ++++ b/drivers/net/wireguard/send.c +@@ -170,6 +170,11 @@ static bool encrypt_packet(struct sk_buf + struct sk_buff *trailer; + int num_frags; + ++ /* Force hash calculation before encryption so that flow analysis is ++ * consistent over the inner packet. ++ */ ++ skb_get_hash(skb); ++ + /* Calculate lengths. */ + padding_len = calculate_skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); +@@ -298,7 +303,7 @@ void wg_packet_encrypt_worker(struct wor + skb_list_walk_safe(first, skb, next) { + if (likely(encrypt_packet(skb, + PACKET_CB(first)->keypair))) { +- wg_reset_packet(skb); ++ wg_reset_packet(skb, true); + } else { + state = PACKET_STATE_DEAD; + break; -- 2.47.3