From 3ab87d10cd9813f14d3f1b93426056553fd45ac3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Nov 2019 15:33:06 +0100 Subject: [PATCH] 5.3-stable patches added patches: cxgb4-fix-panic-when-attaching-to-uld-fail.patch cxgb4-request-the-tx-cidx-updates-to-status-page.patch dccp-do-not-leak-jiffies-on-the-wire.patch erspan-fix-the-tun_info-options_len-check-for-erspan.patch inet-stop-leaking-jiffies-on-the-wire.patch ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch ipv4-fix-route-update-on-metric-change.patch keys-fix-memory-leak-in-copy_net_ns.patch mlxsw-core-unpublish-devlink-parameters-during-reload.patch net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch net-add-skb_queue_empty_lockless.patch net-annotate-accesses-to-sk-sk_incoming_cpu.patch net-annotate-lockless-accesses-to-sk-sk_napi_id.patch net-bcmgenet-don-t-set-phydev-link-from-mac.patch net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch net-dsa-fix-switch-tree-list.patch net-ensure-correct-skb-tstamp-in-various-fragmenters.patch net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch net-fix-sk_page_frag-recursion-from-memory-reclaim.patch net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch net-mlx5-fix-flow-counter-list-auto-bits-struct.patch net-mlx5-fix-rtable-reference-leak.patch net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch net-mlx5e-fix-ethtool-self-test-link-speed.patch net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch net-mlx5e-initialize-on-stack-link-modes-bitmap.patch net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch net-netem-fix-error-path-for-corrupted-gso-frames.patch net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch net-phylink-fix-phylink_dbg-macro.patch net-reorder-struct-net-fields-to-avoid-false-sharing.patch net-rtnetlink-fix-a-typo-fbd-fdb.patch net-smc-fix-closing-of-fallback-smc-sockets.patch net-smc-fix-refcounting-for-non-blocking-connect.patch net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch net-usb-lan78xx-connect-phy-before-registering-mac.patch net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch net-use-skb_queue_empty_lockless-in-poll-handlers.patch net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch netns-fix-gfp-flags-in-rtnl_net_notifyid.patch r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch selftests-fib_tests-add-more-tests-for-metric-update.patch selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch udp-fix-data-race-in-udp_set_dev_scratch.patch udp-use-skb_queue_empty_lockless.patch vxlan-check-tun_info-options_len-properly.patch --- ...fix-panic-when-attaching-to-uld-fail.patch | 92 ++++++ ...t-the-tx-cidx-updates-to-status-page.patch | 50 +++ ...dccp-do-not-leak-jiffies-on-the-wire.patch | 32 ++ ...un_info-options_len-check-for-erspan.patch | 51 ++++ ...net-stop-leaking-jiffies-on-the-wire.patch | 106 +++++++ ...rag_pmtu-handling-with-fragmentation.patch | 105 +++++++ ...v4-fix-route-update-on-metric-change.patch | 65 ++++ .../keys-fix-memory-leak-in-copy_net_ns.patch | 59 ++++ ...ish-devlink-parameters-during-reload.patch | 53 ++++ ...ation-in-__skb_wait_for_more_packets.patch | 79 +++++ .../net-add-skb_queue_empty_lockless.patch | 93 ++++++ ...otate-accesses-to-sk-sk_incoming_cpu.patch | 158 ++++++++++ ...e-lockless-accesses-to-sk-sk_napi_id.patch | 98 ++++++ ...genet-don-t-set-phydev-link-from-mac.patch | 44 +++ ...net-reset-40nm-ephy-on-energy-detect.patch | 65 ++++ ...oft-reset-40nm-ephys-before-mac-init.patch | 264 ++++++++++++++++ ...ot-clear-existing-mirrored-port-mask.patch | 33 ++ ...-imp-setup-for-port-different-than-8.patch | 80 +++++ queue-5.3/net-dsa-fix-switch-tree-list.patch | 32 ++ ...ct-skb-tstamp-in-various-fragmenters.patch | 140 +++++++++ ...dma-coherency-issue-with-sw-checksum.patch | 73 +++++ ...e_frag-recursion-from-memory-reclaim.patch | 160 ++++++++++ ...tency-when-deal-with-high-throughput.patch | 76 +++++ ...is-counting-irq-vector-numbers-issue.patch | 235 ++++++++++++++ ...guaranteed-amount-of-counters-per-vf.patch | 94 ++++++ ...x-flow-counter-list-auto-bits-struct.patch | 37 +++ .../net-mlx5-fix-rtable-reference-leak.patch | 58 ++++ ...e-port-properly-for-vlan-push-action.patch | 66 ++++ ...x5e-fix-ethtool-self-test-link-speed.patch | 60 ++++ ...ssed-cqes-in-case-of-low-napi-budget.patch | 57 ++++ ...nitialize-on-stack-link-modes-bitmap.patch | 31 ++ ...rrect-match-criteria-assignment-line.patch | 33 ++ ...og-when-corrupted-packet-was-dropped.patch | 34 +++ ...-error-path-for-corrupted-gso-frames.patch | 69 +++++ ...7xxx-define-soft_reset-for-40nm-ephy.patch | 35 +++ .../net-phylink-fix-phylink_dbg-macro.patch | 49 +++ ...ct-net-fields-to-avoid-false-sharing.patch | 113 +++++++ .../net-rtnetlink-fix-a-typo-fbd-fdb.patch | 32 ++ ...-fix-closing-of-fallback-smc-sockets.patch | 52 ++++ ...refcounting-for-non-blocking-connect.patch | 45 +++ ...vlan_id-for-smc-r-in-smc_listen_work.patch | 37 +++ ...x-connect-phy-before-registering-mac.patch | 60 ++++ ...ts-before-calling-generic_handle_irq.patch | 92 ++++++ ...empty_lockless-in-busy-poll-contexts.patch | 81 +++++ ...ueue_empty_lockless-in-poll-handlers.patch | 248 +++++++++++++++ ...e-ethtool_wolinfo-in-ethtool_get_wol.patch | 36 +++ ...s-fix-gfp-flags-in-rtnl_net_notifyid.patch | 286 ++++++++++++++++++ ...for-lenovo-thinkpad-usb-c-dock-gen-2.patch | 48 +++ ...ix-wrong-phy-id-issue-with-rtl8168dp.patch | 39 +++ ...ng-of-last-subpacket-of-jumbo-packet.patch | 133 ++++++++ ...sts-add-more-tests-for-metric-update.patch | 51 ++++ ...dualstack-fix-uninitalized-parameter.patch | 44 +++ queue-5.3/series | 55 ++++ ...fix-data-race-in-udp_set_dev_scratch.patch | 102 +++++++ .../udp-use-skb_queue_empty_lockless.patch | 98 ++++++ ...-check-tun_info-options_len-properly.patch | 38 +++ 56 files changed, 4556 insertions(+) create mode 100644 queue-5.3/cxgb4-fix-panic-when-attaching-to-uld-fail.patch create mode 100644 queue-5.3/cxgb4-request-the-tx-cidx-updates-to-status-page.patch create mode 100644 queue-5.3/dccp-do-not-leak-jiffies-on-the-wire.patch create mode 100644 queue-5.3/erspan-fix-the-tun_info-options_len-check-for-erspan.patch create mode 100644 queue-5.3/inet-stop-leaking-jiffies-on-the-wire.patch create mode 100644 queue-5.3/ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch create mode 100644 queue-5.3/ipv4-fix-route-update-on-metric-change.patch create mode 100644 queue-5.3/keys-fix-memory-leak-in-copy_net_ns.patch create mode 100644 queue-5.3/mlxsw-core-unpublish-devlink-parameters-during-reload.patch create mode 100644 queue-5.3/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch create mode 100644 queue-5.3/net-add-skb_queue_empty_lockless.patch create mode 100644 queue-5.3/net-annotate-accesses-to-sk-sk_incoming_cpu.patch create mode 100644 queue-5.3/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch create mode 100644 queue-5.3/net-bcmgenet-don-t-set-phydev-link-from-mac.patch create mode 100644 queue-5.3/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch create mode 100644 queue-5.3/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch create mode 100644 queue-5.3/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch create mode 100644 queue-5.3/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch create mode 100644 queue-5.3/net-dsa-fix-switch-tree-list.patch create mode 100644 queue-5.3/net-ensure-correct-skb-tstamp-in-various-fragmenters.patch create mode 100644 queue-5.3/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch create mode 100644 queue-5.3/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch create mode 100644 queue-5.3/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch create mode 100644 queue-5.3/net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch create mode 100644 queue-5.3/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch create mode 100644 queue-5.3/net-mlx5-fix-flow-counter-list-auto-bits-struct.patch create mode 100644 queue-5.3/net-mlx5-fix-rtable-reference-leak.patch create mode 100644 queue-5.3/net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch create mode 100644 queue-5.3/net-mlx5e-fix-ethtool-self-test-link-speed.patch create mode 100644 queue-5.3/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch create mode 100644 queue-5.3/net-mlx5e-initialize-on-stack-link-modes-bitmap.patch create mode 100644 queue-5.3/net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch create mode 100644 queue-5.3/net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch create mode 100644 queue-5.3/net-netem-fix-error-path-for-corrupted-gso-frames.patch create mode 100644 queue-5.3/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch create mode 100644 queue-5.3/net-phylink-fix-phylink_dbg-macro.patch create mode 100644 queue-5.3/net-reorder-struct-net-fields-to-avoid-false-sharing.patch create mode 100644 queue-5.3/net-rtnetlink-fix-a-typo-fbd-fdb.patch create mode 100644 queue-5.3/net-smc-fix-closing-of-fallback-smc-sockets.patch create mode 100644 queue-5.3/net-smc-fix-refcounting-for-non-blocking-connect.patch create mode 100644 queue-5.3/net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch create mode 100644 queue-5.3/net-usb-lan78xx-connect-phy-before-registering-mac.patch create mode 100644 queue-5.3/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch create mode 100644 queue-5.3/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch create mode 100644 queue-5.3/net-use-skb_queue_empty_lockless-in-poll-handlers.patch create mode 100644 queue-5.3/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch create mode 100644 queue-5.3/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch create mode 100644 queue-5.3/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch create mode 100644 queue-5.3/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch create mode 100644 queue-5.3/rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch create mode 100644 queue-5.3/selftests-fib_tests-add-more-tests-for-metric-update.patch create mode 100644 queue-5.3/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch create mode 100644 queue-5.3/udp-fix-data-race-in-udp_set_dev_scratch.patch create mode 100644 queue-5.3/udp-use-skb_queue_empty_lockless.patch create mode 100644 queue-5.3/vxlan-check-tun_info-options_len-properly.patch diff --git a/queue-5.3/cxgb4-fix-panic-when-attaching-to-uld-fail.patch b/queue-5.3/cxgb4-fix-panic-when-attaching-to-uld-fail.patch new file mode 100644 index 00000000000..e791ebf46ce --- /dev/null +++ b/queue-5.3/cxgb4-fix-panic-when-attaching-to-uld-fail.patch @@ -0,0 +1,92 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Vishal Kulkarni +Date: Wed, 30 Oct 2019 20:17:57 +0530 +Subject: cxgb4: fix panic when attaching to ULD fail + +From: Vishal Kulkarni + +[ Upstream commit fc89cc358fb64e2429aeae0f37906126636507ec ] + +Release resources when attaching to ULD fail. Otherwise, data +mismatch is seen between LLD and ULD later on, which lead to +kernel panic when accessing resources that should not even +exist in the first place. + +Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD") +Signed-off-by: Shahjada Abul Husain +Signed-off-by: Vishal Kulkarni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 28 ++++++++++++++----------- + 1 file changed, 16 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +@@ -695,10 +695,10 @@ static void uld_init(struct adapter *ada + lld->write_cmpl_support = adap->params.write_cmpl_support; + } + +-static void uld_attach(struct adapter *adap, unsigned int uld) ++static int uld_attach(struct adapter *adap, unsigned int uld) + { +- void *handle; + struct cxgb4_lld_info lli; ++ void *handle; + + uld_init(adap, &lli); + uld_queue_init(adap, uld, &lli); +@@ -708,7 +708,7 @@ static void uld_attach(struct adapter *a + dev_warn(adap->pdev_dev, + "could not attach to the %s driver, error %ld\n", + adap->uld[uld].name, PTR_ERR(handle)); +- return; ++ return PTR_ERR(handle); + } + + adap->uld[uld].handle = handle; +@@ -716,22 +716,22 @@ static void uld_attach(struct adapter *a + + if (adap->flags & CXGB4_FULL_INIT_DONE) + adap->uld[uld].state_change(handle, CXGB4_STATE_UP); ++ ++ return 0; + } + +-/** +- * cxgb4_register_uld - register an upper-layer driver +- * @type: the ULD type +- * @p: the ULD methods ++/* cxgb4_register_uld - register an upper-layer driver ++ * @type: the ULD type ++ * @p: the ULD methods + * +- * Registers an upper-layer driver with this driver and notifies the ULD +- * about any presently available devices that support its type. Returns +- * %-EBUSY if a ULD of the same type is already registered. ++ * Registers an upper-layer driver with this driver and notifies the ULD ++ * about any presently available devices that support its type. + */ + void cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) + { +- int ret = 0; + struct adapter *adap; ++ int ret = 0; + + if (type >= CXGB4_ULD_MAX) + return; +@@ -763,8 +763,12 @@ void cxgb4_register_uld(enum cxgb4_uld t + if (ret) + goto free_irq; + adap->uld[type] = *p; +- uld_attach(adap, type); ++ ret = uld_attach(adap, type); ++ if (ret) ++ goto free_txq; + continue; ++free_txq: ++ release_sge_txq_uld(adap, type); + free_irq: + if (adap->flags & CXGB4_FULL_INIT_DONE) + quiesce_rx_uld(adap, type); diff --git a/queue-5.3/cxgb4-request-the-tx-cidx-updates-to-status-page.patch b/queue-5.3/cxgb4-request-the-tx-cidx-updates-to-status-page.patch new file mode 100644 index 00000000000..6ef8a9ec0b3 --- /dev/null +++ b/queue-5.3/cxgb4-request-the-tx-cidx-updates-to-status-page.patch @@ -0,0 +1,50 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Raju Rangoju +Date: Wed, 23 Oct 2019 23:03:55 +0530 +Subject: cxgb4: request the TX CIDX updates to status page + +From: Raju Rangoju + +[ Upstream commit 7c3bebc3d8688b84795c11848c314a2fbfe045e0 ] + +For adapters which support the SGE Doorbell Queue Timer facility, +we configured the Ethernet TX Queues to send CIDX Updates to the +Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE +messages to allow us to respond more quickly to the CIDX Updates. +But, this was adding load to PCIe Link RX bandwidth and, +potentially, resulting in higher CPU Interrupt load. + +This patch requests the HW to deliver the CIDX updates to the TX +queue status page rather than generating an ingress queue message +(as an interrupt). With this patch, the load on RX bandwidth is +reduced and a substantial improvement in BW is noticed at lower +IO sizes. + +Fixes: d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer") +Signed-off-by: Raju Rangoju +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb4/sge.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c +@@ -3791,15 +3791,11 @@ int t4_sge_alloc_eth_txq(struct adapter + * write the CIDX Updates into the Status Page at the end of the + * TX Queue. + */ +- c.autoequiqe_to_viid = htonl((dbqt +- ? FW_EQ_ETH_CMD_AUTOEQUIQE_F +- : FW_EQ_ETH_CMD_AUTOEQUEQE_F) | ++ c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); + + c.fetchszm_to_iqid = +- htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt +- ? HOSTFCMODE_INGRESS_QUEUE_X +- : HOSTFCMODE_STATUS_PAGE_X) | ++ htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); + diff --git a/queue-5.3/dccp-do-not-leak-jiffies-on-the-wire.patch b/queue-5.3/dccp-do-not-leak-jiffies-on-the-wire.patch new file mode 100644 index 00000000000..2ba28973085 --- /dev/null +++ b/queue-5.3/dccp-do-not-leak-jiffies-on-the-wire.patch @@ -0,0 +1,32 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Mon, 4 Nov 2019 07:57:55 -0800 +Subject: dccp: do not leak jiffies on the wire + +From: Eric Dumazet + +[ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ] + +For some reason I missed the case of DCCP passive +flows in my previous patch. + +Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire") +Signed-off-by: Eric Dumazet +Reported-by: Thiemo Nagel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -416,7 +416,7 @@ struct sock *dccp_v4_request_recv_sock(c + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; +- newinet->inet_id = jiffies; ++ newinet->inet_id = prandom_u32(); + + if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) + goto put_and_exit; diff --git a/queue-5.3/erspan-fix-the-tun_info-options_len-check-for-erspan.patch b/queue-5.3/erspan-fix-the-tun_info-options_len-check-for-erspan.patch new file mode 100644 index 00000000000..2dcc9702d22 --- /dev/null +++ b/queue-5.3/erspan-fix-the-tun_info-options_len-check-for-erspan.patch @@ -0,0 +1,51 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Xin Long +Date: Mon, 28 Oct 2019 23:19:35 +0800 +Subject: erspan: fix the tun_info options_len check for erspan + +From: Xin Long + +[ Upstream commit 2eb8d6d2910cfe3dc67dc056f26f3dd9c63d47cd ] + +The check for !md doens't really work for ip_tunnel_info_opts(info) which +only does info + 1. Also to avoid out-of-bounds access on info, it should +ensure options_len is not less than erspan_metadata in both erspan_xmit() +and ip6erspan_tunnel_xmit(). + +Fixes: 1a66a836da ("gre: add collect_md mode to ERSPAN tunnel") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 4 ++-- + net/ipv6/ip6_gre.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -509,9 +509,9 @@ static void erspan_fb_xmit(struct sk_buf + key = &tun_info->key; + if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + goto err_free_skb; +- md = ip_tunnel_info_opts(tun_info); +- if (!md) ++ if (tun_info->options_len < sizeof(*md)) + goto err_free_skb; ++ md = ip_tunnel_info_opts(tun_info); + + /* ERSPAN has fixed 8 byte GRE header */ + version = md->version; +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -980,9 +980,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit + dsfield = key->tos; + if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + goto tx_err; +- md = ip_tunnel_info_opts(tun_info); +- if (!md) ++ if (tun_info->options_len < sizeof(*md)) + goto tx_err; ++ md = ip_tunnel_info_opts(tun_info); + + tun_id = tunnel_id_to_key32(key->tun_id); + if (md->version == 1) { diff --git a/queue-5.3/inet-stop-leaking-jiffies-on-the-wire.patch b/queue-5.3/inet-stop-leaking-jiffies-on-the-wire.patch new file mode 100644 index 00000000000..00aeda70010 --- /dev/null +++ b/queue-5.3/inet-stop-leaking-jiffies-on-the-wire.patch @@ -0,0 +1,106 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Fri, 1 Nov 2019 10:32:19 -0700 +Subject: inet: stop leaking jiffies on the wire + +From: Eric Dumazet + +[ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ] + +Historically linux tried to stick to RFC 791, 1122, 2003 +for IPv4 ID field generation. + +RFC 6864 made clear that no matter how hard we try, +we can not ensure unicity of IP ID within maximum +lifetime for all datagrams with a given source +address/destination address/protocol tuple. + +Linux uses a per socket inet generator (inet_id), initialized +at connection startup with a XOR of 'jiffies' and other +fields that appear clear on the wire. + +Thiemo Nagel pointed that this strategy is a privacy +concern as this provides 16 bits of entropy to fingerprint +devices. + +Let's switch to a random starting point, this is just as +good as far as RFC 6864 is concerned and does not leak +anything critical. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: Thiemo Nagel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- + net/dccp/ipv4.c | 2 +- + net/ipv4/datagram.c | 2 +- + net/ipv4/tcp_ipv4.c | 4 ++-- + net/sctp/socket.c | 2 +- + 5 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/crypto/chelsio/chtls/chtls_cm.c ++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c +@@ -1297,7 +1297,7 @@ static void make_established(struct sock + tp->write_seq = snd_isn; + tp->snd_nxt = snd_isn; + tp->snd_una = snd_isn; +- inet_sk(sk)->inet_id = tp->write_seq ^ jiffies; ++ inet_sk(sk)->inet_id = prandom_u32(); + assign_rxopt(sk, opt); + + if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10)) +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -117,7 +117,7 @@ int dccp_v4_connect(struct sock *sk, str + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); +- inet->inet_id = dp->dccps_iss ^ jiffies; ++ inet->inet_id = prandom_u32(); + + err = dccp_connect(sk); + rt = NULL; +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock * + reuseport_has_conns(sk, true); + sk->sk_state = TCP_ESTABLISHED; + sk_set_txhash(sk); +- inet->inet_id = jiffies; ++ inet->inet_id = prandom_u32(); + + sk_dst_set(sk, &rt->dst); + err = 0; +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -300,7 +300,7 @@ int tcp_v4_connect(struct sock *sk, stru + inet->inet_daddr); + } + +- inet->inet_id = tp->write_seq ^ jiffies; ++ inet->inet_id = prandom_u32(); + + if (tcp_fastopen_defer_connect(sk, &err)) + return err; +@@ -1443,7 +1443,7 @@ struct sock *tcp_v4_syn_recv_sock(const + inet_csk(newsk)->icsk_ext_hdr_len = 0; + if (inet_opt) + inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; +- newinet->inet_id = newtp->write_seq ^ jiffies; ++ newinet->inet_id = prandom_u32(); + + if (!dst) { + dst = inet_csk_route_child_sock(sk, newsk, req); +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -9159,7 +9159,7 @@ void sctp_copy_sock(struct sock *newsk, + newinet->inet_rcv_saddr = inet->inet_rcv_saddr; + newinet->inet_dport = htons(asoc->peer.port); + newinet->pmtudisc = inet->pmtudisc; +- newinet->inet_id = asoc->next_tsn ^ jiffies; ++ newinet->inet_id = prandom_u32(); + + newinet->uc_ttl = inet->uc_ttl; + newinet->mc_loop = 1; diff --git a/queue-5.3/ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch b/queue-5.3/ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch new file mode 100644 index 00000000000..897462e10d8 --- /dev/null +++ b/queue-5.3/ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch @@ -0,0 +1,105 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Sat, 19 Oct 2019 09:26:37 -0700 +Subject: ipv4: fix IPSKB_FRAG_PMTU handling with fragmentation + +From: Eric Dumazet + +[ Upstream commit e7a409c3f46cb0dbc7bfd4f6f9421d53e92614a5 ] + +This patch removes the iph field from the state structure, which is not +properly initialized. Instead, add a new field to make the "do we want +to set DF" be the state bit and move the code to set the DF flag from +ip_frag_next(). + +Joint work with Pablo and Linus. + +Fixes: 19c3401a917b ("net: ipv4: place control buffer handling away from fragmentation iterators") +Reported-by: Patrick Schönthaler +Signed-off-by: Eric Dumazet +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Linus Torvalds +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 4 ++-- + net/bridge/netfilter/nf_conntrack_bridge.c | 2 +- + net/ipv4/ip_output.c | 11 ++++++----- + 3 files changed, 9 insertions(+), 8 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -184,7 +184,7 @@ static inline struct sk_buff *ip_fraglis + } + + struct ip_frag_state { +- struct iphdr *iph; ++ bool DF; + unsigned int hlen; + unsigned int ll_rs; + unsigned int mtu; +@@ -195,7 +195,7 @@ struct ip_frag_state { + }; + + void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, +- unsigned int mtu, struct ip_frag_state *state); ++ unsigned int mtu, bool DF, struct ip_frag_state *state); + struct sk_buff *ip_frag_next(struct sk_buff *skb, + struct ip_frag_state *state); + +--- a/net/bridge/netfilter/nf_conntrack_bridge.c ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c +@@ -94,7 +94,7 @@ slow_path: + * This may also be a clone skbuff, we could preserve the geometry for + * the copies but probably not worth the effort. + */ +- ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state); ++ ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); + + while (state.left > 0) { + struct sk_buff *skb2; +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -645,11 +645,12 @@ void ip_fraglist_prepare(struct sk_buff + EXPORT_SYMBOL(ip_fraglist_prepare); + + void ip_frag_init(struct sk_buff *skb, unsigned int hlen, +- unsigned int ll_rs, unsigned int mtu, ++ unsigned int ll_rs, unsigned int mtu, bool DF, + struct ip_frag_state *state) + { + struct iphdr *iph = ip_hdr(skb); + ++ state->DF = DF; + state->hlen = hlen; + state->ll_rs = ll_rs; + state->mtu = mtu; +@@ -668,9 +669,6 @@ static void ip_frag_ipcb(struct sk_buff + /* Copy the flags to each fragment. */ + IPCB(to)->flags = IPCB(from)->flags; + +- if (IPCB(from)->flags & IPSKB_FRAG_PMTU) +- state->iph->frag_off |= htons(IP_DF); +- + /* ANK: dirty, but effective trick. Upgrade options only if + * the segment to be fragmented was THE FIRST (otherwise, + * options are already fixed) and make it ONCE +@@ -738,6 +736,8 @@ struct sk_buff *ip_frag_next(struct sk_b + */ + iph = ip_hdr(skb2); + iph->frag_off = htons((state->offset >> 3)); ++ if (state->DF) ++ iph->frag_off |= htons(IP_DF); + + /* + * Added AC : If we are fragmenting a fragment that's not the +@@ -881,7 +881,8 @@ slow_path: + * Fragment the datagram. + */ + +- ip_frag_init(skb, hlen, ll_rs, mtu, &state); ++ ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU, ++ &state); + + /* + * Keep copying data until we run out. diff --git a/queue-5.3/ipv4-fix-route-update-on-metric-change.patch b/queue-5.3/ipv4-fix-route-update-on-metric-change.patch new file mode 100644 index 00000000000..7e2bc363108 --- /dev/null +++ b/queue-5.3/ipv4-fix-route-update-on-metric-change.patch @@ -0,0 +1,65 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Paolo Abeni +Date: Sat, 26 Oct 2019 11:53:39 +0200 +Subject: ipv4: fix route update on metric change. + +From: Paolo Abeni + +[ Upstream commit 0b834ba00ab5337e938c727e216e1f5249794717 ] + +Since commit af4d768ad28c ("net/ipv4: Add support for specifying metric +of connected routes"), when updating an IP address with a different metric, +the associated connected route is updated, too. + +Still, the mentioned commit doesn't handle properly some corner cases: + +$ ip addr add dev eth0 192.168.1.0/24 +$ ip addr add dev eth0 192.168.2.1/32 peer 192.168.2.2 +$ ip addr add dev eth0 192.168.3.1/24 +$ ip addr change dev eth0 192.168.1.0/24 metric 10 +$ ip addr change dev eth0 192.168.2.1/32 peer 192.168.2.2 metric 10 +$ ip addr change dev eth0 192.168.3.1/24 metric 10 +$ ip -4 route +192.168.1.0/24 dev eth0 proto kernel scope link src 192.168.1.0 +192.168.2.2 dev eth0 proto kernel scope link src 192.168.2.1 +192.168.3.0/24 dev eth0 proto kernel scope link src 192.168.2.1 metric 10 + +Only the last route is correctly updated. + +The problem is the current test in fib_modify_prefix_metric(): + + if (!(dev->flags & IFF_UP) || + ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) || + ipv4_is_zeronet(prefix) || + prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32) + +Which should be the logical 'not' of the pre-existing test in +fib_add_ifaddr(): + + if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && + (prefix != addr || ifa->ifa_prefixlen < 32)) + +To properly negate the original expression, we need to change the last +logical 'or' to a logical 'and'. + +Fixes: af4d768ad28c ("net/ipv4: Add support for specifying metric of connected routes") +Reported-and-suggested-by: Beniamino Galvani +Signed-off-by: Paolo Abeni +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1147,7 +1147,7 @@ void fib_modify_prefix_metric(struct in_ + if (!(dev->flags & IFF_UP) || + ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) || + ipv4_is_zeronet(prefix) || +- prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32) ++ (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32)) + return; + + /* add the new */ diff --git a/queue-5.3/keys-fix-memory-leak-in-copy_net_ns.patch b/queue-5.3/keys-fix-memory-leak-in-copy_net_ns.patch new file mode 100644 index 00000000000..57e27f81920 --- /dev/null +++ b/queue-5.3/keys-fix-memory-leak-in-copy_net_ns.patch @@ -0,0 +1,59 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Takeshi Misawa +Date: Sat, 19 Oct 2019 15:34:43 +0900 +Subject: keys: Fix memory leak in copy_net_ns + +From: Takeshi Misawa + +[ Upstream commit 82ecff655e7968151b0047f1b5de03b249e5c1c4 ] + +If copy_net_ns() failed after net_alloc(), net->key_domain is leaked. +Fix this, by freeing key_domain in error path. + +syzbot report: +BUG: memory leak +unreferenced object 0xffff8881175007e0 (size 32): + comm "syz-executor902", pid 7069, jiffies 4294944350 (age 28.400s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<00000000a83ed741>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] + [<00000000a83ed741>] slab_post_alloc_hook mm/slab.h:439 [inline] + [<00000000a83ed741>] slab_alloc mm/slab.c:3326 [inline] + [<00000000a83ed741>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553 + [<0000000059fc92b9>] kmalloc include/linux/slab.h:547 [inline] + [<0000000059fc92b9>] kzalloc include/linux/slab.h:742 [inline] + [<0000000059fc92b9>] net_alloc net/core/net_namespace.c:398 [inline] + [<0000000059fc92b9>] copy_net_ns+0xb2/0x220 net/core/net_namespace.c:445 + [<00000000a9d74bbc>] create_new_namespaces+0x141/0x2a0 kernel/nsproxy.c:103 + [<000000008047d645>] unshare_nsproxy_namespaces+0x7f/0x100 kernel/nsproxy.c:202 + [<000000005993ea6e>] ksys_unshare+0x236/0x490 kernel/fork.c:2674 + [<0000000019417e75>] __do_sys_unshare kernel/fork.c:2742 [inline] + [<0000000019417e75>] __se_sys_unshare kernel/fork.c:2740 [inline] + [<0000000019417e75>] __x64_sys_unshare+0x16/0x20 kernel/fork.c:2740 + [<00000000f4c5f2c8>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296 + [<0000000038550184>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +syzbot also reported other leak in copy_net_ns -> setup_net. +This problem is already fixed by cf47a0b882a4e5f6b34c7949d7b293e9287f1972. + +Fixes: 9b242610514f ("keys: Network namespace domain tag") +Reported-and-tested-by: syzbot+3b3296d032353c33184b@syzkaller.appspotmail.com +Signed-off-by: Takeshi Misawa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/net_namespace.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -478,6 +478,7 @@ struct net *copy_net_ns(unsigned long fl + + if (rv < 0) { + put_userns: ++ key_remove_domain(net->key_domain); + put_user_ns(user_ns); + net_drop_ns(net); + dec_ucounts: diff --git a/queue-5.3/mlxsw-core-unpublish-devlink-parameters-during-reload.patch b/queue-5.3/mlxsw-core-unpublish-devlink-parameters-during-reload.patch new file mode 100644 index 00000000000..fd1854da61a --- /dev/null +++ b/queue-5.3/mlxsw-core-unpublish-devlink-parameters-during-reload.patch @@ -0,0 +1,53 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Jiri Pirko +Date: Wed, 30 Oct 2019 11:04:22 +0200 +Subject: mlxsw: core: Unpublish devlink parameters during reload + +From: Jiri Pirko + +[ Upstream commit b7265a0df82c1716bf788096217083ed65a8bb14 ] + +The devlink parameter "acl_region_rehash_interval" is a runtime +parameter whose value is stored in a dynamically allocated memory. While +reloading the driver, this memory is freed and then allocated again. A +use-after-free might happen if during this time frame someone tries to +retrieve its value. + +Since commit 070c63f20f6c ("net: devlink: allow to change namespaces +during reload") the use-after-free can be reliably triggered when +reloading the driver into a namespace, as after freeing the memory (via +reload_down() callback) all the parameters are notified. + +Fix this by unpublishing and then re-publishing the parameters during +reload. + +Fixes: 98bbf70c1c41 ("mlxsw: spectrum: add "acl_region_rehash_interval" devlink param") +Fixes: 7c62cfb8c574 ("devlink: publish params only after driver init is done") +Signed-off-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -1128,7 +1128,7 @@ __mlxsw_core_bus_device_register(const s + if (err) + goto err_thermal_init; + +- if (mlxsw_driver->params_register && !reload) ++ if (mlxsw_driver->params_register) + devlink_params_publish(devlink); + + return 0; +@@ -1201,7 +1201,7 @@ void mlxsw_core_bus_device_unregister(st + return; + } + +- if (mlxsw_core->driver->params_unregister && !reload) ++ if (mlxsw_core->driver->params_unregister) + devlink_params_unpublish(devlink); + mlxsw_thermal_fini(mlxsw_core->thermal); + mlxsw_hwmon_fini(mlxsw_core->hwmon); diff --git a/queue-5.3/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch b/queue-5.3/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch new file mode 100644 index 00000000000..41657ac0988 --- /dev/null +++ b/queue-5.3/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch @@ -0,0 +1,79 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 23 Oct 2019 22:44:52 -0700 +Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets() + +From: Eric Dumazet + +[ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ] + +__skb_wait_for_more_packets() can be called while other cpus +can feed packets to the socket receive queue. + +KCSAN reported : + +BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb + +write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0: + __skb_insert include/linux/skbuff.h:1852 [inline] + __skb_queue_before include/linux/skbuff.h:1958 [inline] + __skb_queue_tail include/linux/skbuff.h:1991 [inline] + __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470 + __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] + udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 + udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 + udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 + __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 + udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 + ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 + ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 + dst_input include/net/dst.h:442 [inline] + ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + +read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1: + __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100 + __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683 + udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 + inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 + sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 + ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 + do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 + __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 + __do_sys_recvmmsg net/socket.c:2703 [inline] + __se_sys_recvmmsg net/socket.c:2696 [inline] + __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 + do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/datagram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct s + if (error) + goto out_err; + +- if (sk->sk_receive_queue.prev != skb) ++ if (READ_ONCE(sk->sk_receive_queue.prev) != skb) + goto out; + + /* Socket shut down? */ diff --git a/queue-5.3/net-add-skb_queue_empty_lockless.patch b/queue-5.3/net-add-skb_queue_empty_lockless.patch new file mode 100644 index 00000000000..70129c0f103 --- /dev/null +++ b/queue-5.3/net-add-skb_queue_empty_lockless.patch @@ -0,0 +1,93 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 23 Oct 2019 22:44:48 -0700 +Subject: net: add skb_queue_empty_lockless() + +From: Eric Dumazet + +[ Upstream commit d7d16a89350ab263484c0aa2b523dd3a234e4a80 ] + +Some paths call skb_queue_empty() without holding +the queue lock. We must use a barrier in order +to not let the compiler do strange things, and avoid +KCSAN splats. + +Adding a barrier in skb_queue_empty() might be overkill, +I prefer adding a new helper to clearly identify +points where the callers might be lockless. This might +help us finding real bugs. + +The corresponding WRITE_ONCE() should add zero cost +for current compilers. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 33 ++++++++++++++++++++++++--------- + 1 file changed, 24 insertions(+), 9 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1501,6 +1501,19 @@ static inline int skb_queue_empty(const + } + + /** ++ * skb_queue_empty_lockless - check if a queue is empty ++ * @list: queue head ++ * ++ * Returns true if the queue is empty, false otherwise. ++ * This variant can be used in lockless contexts. ++ */ ++static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list) ++{ ++ return READ_ONCE(list->next) == (const struct sk_buff *) list; ++} ++ ++ ++/** + * skb_queue_is_last - check if skb is the last entry in the queue + * @list: queue head + * @skb: buffer +@@ -1853,9 +1866,11 @@ static inline void __skb_insert(struct s + struct sk_buff *prev, struct sk_buff *next, + struct sk_buff_head *list) + { +- newsk->next = next; +- newsk->prev = prev; +- next->prev = prev->next = newsk; ++ /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */ ++ WRITE_ONCE(newsk->next, next); ++ WRITE_ONCE(newsk->prev, prev); ++ WRITE_ONCE(next->prev, newsk); ++ WRITE_ONCE(prev->next, newsk); + list->qlen++; + } + +@@ -1866,11 +1881,11 @@ static inline void __skb_queue_splice(co + struct sk_buff *first = list->next; + struct sk_buff *last = list->prev; + +- first->prev = prev; +- prev->next = first; ++ WRITE_ONCE(first->prev, prev); ++ WRITE_ONCE(prev->next, first); + +- last->next = next; +- next->prev = last; ++ WRITE_ONCE(last->next, next); ++ WRITE_ONCE(next->prev, last); + } + + /** +@@ -2011,8 +2026,8 @@ static inline void __skb_unlink(struct s + next = skb->next; + prev = skb->prev; + skb->next = skb->prev = NULL; +- next->prev = prev; +- prev->next = next; ++ WRITE_ONCE(next->prev, prev); ++ WRITE_ONCE(prev->next, next); + } + + /** diff --git a/queue-5.3/net-annotate-accesses-to-sk-sk_incoming_cpu.patch b/queue-5.3/net-annotate-accesses-to-sk-sk_incoming_cpu.patch new file mode 100644 index 00000000000..a73b51352d8 --- /dev/null +++ b/queue-5.3/net-annotate-accesses-to-sk-sk_incoming_cpu.patch @@ -0,0 +1,158 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 30 Oct 2019 13:00:04 -0700 +Subject: net: annotate accesses to sk->sk_incoming_cpu + +From: Eric Dumazet + +[ Upstream commit 7170a977743b72cf3eb46ef6ef89885dc7ad3621 ] + +This socket field can be read and written by concurrent cpus. + +Use READ_ONCE() and WRITE_ONCE() annotations to document this, +and avoid some compiler 'optimizations'. + +KCSAN reported : + +BUG: KCSAN: data-race in tcp_v4_rcv / tcp_v4_rcv + +write to 0xffff88812220763c of 4 bytes by interrupt on cpu 0: + sk_incoming_cpu_update include/net/sock.h:953 [inline] + tcp_v4_rcv+0x1b3c/0x1bb0 net/ipv4/tcp_ipv4.c:1934 + ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 + ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 + dst_input include/net/dst.h:442 [inline] + ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + napi_poll net/core/dev.c:6392 [inline] + net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 + __do_softirq+0x115/0x33f kernel/softirq.c:292 + do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 + do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337 + do_softirq kernel/softirq.c:329 [inline] + __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189 + +read to 0xffff88812220763c of 4 bytes by interrupt on cpu 1: + sk_incoming_cpu_update include/net/sock.h:952 [inline] + tcp_v4_rcv+0x181a/0x1bb0 net/ipv4/tcp_ipv4.c:1934 + ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 + ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 + dst_input include/net/dst.h:442 [inline] + ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + napi_poll net/core/dev.c:6392 [inline] + net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 + __do_softirq+0x115/0x33f kernel/softirq.c:292 + run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 + smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 4 ++-- + net/core/sock.c | 4 ++-- + net/ipv4/inet_hashtables.c | 2 +- + net/ipv4/udp.c | 2 +- + net/ipv6/inet6_hashtables.c | 2 +- + net/ipv6/udp.c | 2 +- + 6 files changed, 8 insertions(+), 8 deletions(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -949,8 +949,8 @@ static inline void sk_incoming_cpu_updat + { + int cpu = raw_smp_processor_id(); + +- if (unlikely(sk->sk_incoming_cpu != cpu)) +- sk->sk_incoming_cpu = cpu; ++ if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu)) ++ WRITE_ONCE(sk->sk_incoming_cpu, cpu); + } + + static inline void sock_rps_record_flow_hash(__u32 hash) +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1125,7 +1125,7 @@ set_rcvbuf: + break; + } + case SO_INCOMING_CPU: +- sk->sk_incoming_cpu = val; ++ WRITE_ONCE(sk->sk_incoming_cpu, val); + break; + + case SO_CNX_ADVICE: +@@ -1474,7 +1474,7 @@ int sock_getsockopt(struct socket *sock, + break; + + case SO_INCOMING_CPU: +- v.val = sk->sk_incoming_cpu; ++ v.val = READ_ONCE(sk->sk_incoming_cpu); + break; + + case SO_MEMINFO: +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -240,7 +240,7 @@ static inline int compute_score(struct s + return -1; + + score = sk->sk_family == PF_INET ? 2 : 1; +- if (sk->sk_incoming_cpu == raw_smp_processor_id()) ++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + score++; + } + return score; +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -388,7 +388,7 @@ static int compute_score(struct sock *sk + return -1; + score += 4; + +- if (sk->sk_incoming_cpu == raw_smp_processor_id()) ++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + score++; + return score; + } +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -105,7 +105,7 @@ static inline int compute_score(struct s + return -1; + + score = 1; +- if (sk->sk_incoming_cpu == raw_smp_processor_id()) ++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + score++; + } + return score; +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -135,7 +135,7 @@ static int compute_score(struct sock *sk + return -1; + score++; + +- if (sk->sk_incoming_cpu == raw_smp_processor_id()) ++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + score++; + + return score; diff --git a/queue-5.3/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch b/queue-5.3/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch new file mode 100644 index 00000000000..afcb3d59bec --- /dev/null +++ b/queue-5.3/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch @@ -0,0 +1,98 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Tue, 29 Oct 2019 10:54:44 -0700 +Subject: net: annotate lockless accesses to sk->sk_napi_id + +From: Eric Dumazet + +[ Upstream commit ee8d153d46a3b98c064ee15c0c0a3bbf1450e5a1 ] + +We already annotated most accesses to sk->sk_napi_id + +We missed sk_mark_napi_id() and sk_mark_napi_id_once() +which might be called without socket lock held in UDP stack. + +KCSAN reported : +BUG: KCSAN: data-race in udpv6_queue_rcv_one_skb / udpv6_queue_rcv_one_skb + +write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 0: + sk_mark_napi_id include/net/busy_poll.h:125 [inline] + __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline] + udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672 + udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689 + udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832 + __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913 + udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015 + ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409 + ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459 + dst_input include/net/dst.h:442 [inline] + ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + napi_poll net/core/dev.c:6392 [inline] + net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 + +write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 1: + sk_mark_napi_id include/net/busy_poll.h:125 [inline] + __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline] + udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672 + udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689 + udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832 + __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913 + udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015 + ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409 + ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459 + dst_input include/net/dst.h:442 [inline] + ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 10890 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Fixes: e68b6e50fa35 ("udp: enable busy polling for all sockets") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/busy_poll.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/include/net/busy_poll.h ++++ b/include/net/busy_poll.h +@@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(stru + static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) + { + #ifdef CONFIG_NET_RX_BUSY_POLL +- sk->sk_napi_id = skb->napi_id; ++ WRITE_ONCE(sk->sk_napi_id, skb->napi_id); + #endif + sk_rx_queue_set(sk, skb); + } +@@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once( + const struct sk_buff *skb) + { + #ifdef CONFIG_NET_RX_BUSY_POLL +- if (!sk->sk_napi_id) +- sk->sk_napi_id = skb->napi_id; ++ if (!READ_ONCE(sk->sk_napi_id)) ++ WRITE_ONCE(sk->sk_napi_id, skb->napi_id); + #endif + } + diff --git a/queue-5.3/net-bcmgenet-don-t-set-phydev-link-from-mac.patch b/queue-5.3/net-bcmgenet-don-t-set-phydev-link-from-mac.patch new file mode 100644 index 00000000000..5745bcd2815 --- /dev/null +++ b/queue-5.3/net-bcmgenet-don-t-set-phydev-link-from-mac.patch @@ -0,0 +1,44 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Doug Berger +Date: Wed, 16 Oct 2019 16:06:29 -0700 +Subject: net: bcmgenet: don't set phydev->link from MAC + +From: Doug Berger + +[ Upstream commit 7de48402faa32298c3551ea32c76ccb4f9d3025d ] + +When commit 28b2e0d2cd13 ("net: phy: remove parameter new_link from +phy_mac_interrupt()") removed the new_link parameter it set the +phydev->link state from the MAC before invoking phy_mac_interrupt(). + +However, once commit 88d6272acaaa ("net: phy: avoid unneeded MDIO +reads in genphy_read_status") was added this initialization prevents +the proper determination of the connection parameters by the function +genphy_read_status(). + +This commit removes that initialization to restore the proper +functionality. + +Fixes: 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in genphy_read_status") +Signed-off-by: Doug Berger +Acked-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -2617,10 +2617,8 @@ static void bcmgenet_irq_task(struct wor + spin_unlock_irq(&priv->lock); + + /* Link UP/DOWN event */ +- if (status & UMAC_IRQ_LINK_EVENT) { +- priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP); ++ if (status & UMAC_IRQ_LINK_EVENT) + phy_mac_interrupt(priv->dev->phydev); +- } + } + + /* bcmgenet_isr1: handle Rx and Tx priority queues */ diff --git a/queue-5.3/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch b/queue-5.3/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch new file mode 100644 index 00000000000..8b166312152 --- /dev/null +++ b/queue-5.3/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch @@ -0,0 +1,65 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Doug Berger +Date: Wed, 16 Oct 2019 16:06:32 -0700 +Subject: net: bcmgenet: reset 40nm EPHY on energy detect + +From: Doug Berger + +[ Upstream commit 25382b991d252aed961cd434176240f9de6bb15f ] + +The EPHY integrated into the 40nm Set-Top Box devices can falsely +detect energy when connected to a disabled peer interface. When the +peer interface is enabled the EPHY will detect and report the link +as active, but on occasion may get into a state where it is not +able to exchange data with the connected GENET MAC. This issue has +not been observed when the link parameters are auto-negotiated; +however, it has been observed with a manually configured link. + +It has been empirically determined that issuing a soft reset to the +EPHY when energy is detected prevents it from getting into this bad +state. + +Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") +Signed-off-by: Doug Berger +Acked-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -2018,6 +2018,8 @@ static void bcmgenet_link_intr_enable(st + */ + if (priv->internal_phy) { + int0_enable |= UMAC_IRQ_LINK_EVENT; ++ if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) ++ int0_enable |= UMAC_IRQ_PHY_DET_R; + } else if (priv->ext_phy) { + int0_enable |= UMAC_IRQ_LINK_EVENT; + } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { +@@ -2616,9 +2618,14 @@ static void bcmgenet_irq_task(struct wor + priv->irq0_stat = 0; + spin_unlock_irq(&priv->lock); + ++ if (status & UMAC_IRQ_PHY_DET_R && ++ priv->dev->phydev->autoneg != AUTONEG_ENABLE) ++ phy_init_hw(priv->dev->phydev); ++ + /* Link UP/DOWN event */ + if (status & UMAC_IRQ_LINK_EVENT) + phy_mac_interrupt(priv->dev->phydev); ++ + } + + /* bcmgenet_isr1: handle Rx and Tx priority queues */ +@@ -2713,7 +2720,7 @@ static irqreturn_t bcmgenet_isr0(int irq + } + + /* all other interested interrupts handled in bottom half */ +- status &= UMAC_IRQ_LINK_EVENT; ++ status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R); + if (status) { + /* Save irq status for bottom-half processing. */ + spin_lock_irqsave(&priv->lock, flags); diff --git a/queue-5.3/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch b/queue-5.3/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch new file mode 100644 index 00000000000..6bfd93be812 --- /dev/null +++ b/queue-5.3/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch @@ -0,0 +1,264 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Doug Berger +Date: Wed, 16 Oct 2019 16:06:31 -0700 +Subject: net: bcmgenet: soft reset 40nm EPHYs before MAC init + +From: Doug Berger + +[ Upstream commit 1f515486275a08a17a2c806b844cca18f7de5b34 ] + +It turns out that the "Workaround for putting the PHY in IDDQ mode" +used by the internal EPHYs on 40nm Set-Top Box chips when powering +down puts the interface to the GENET MAC in a state that can cause +subsequent MAC resets to be incomplete. + +Rather than restore the forced soft reset when powering up internal +PHYs, this commit moves the invocation of phy_init_hw earlier in +the MAC initialization sequence to just before the MAC reset in the +open and resume functions. This allows the interface to be stable +and allows the MAC resets to be successful. + +The bcmgenet_mii_probe() function is split in two to accommodate +this. The new function bcmgenet_mii_connect() handles the first +half of the functionality before the MAC initialization, and the +bcmgenet_mii_config() function is extended to provide the remaining +PHY configuration following the MAC initialization. + +Fixes: 484bfa1507bf ("Revert "net: bcmgenet: Software reset EPHY after power on"") +Signed-off-by: Doug Berger +Acked-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 28 +++--- + drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 112 +++++++++++-------------- + 3 files changed, 69 insertions(+), 73 deletions(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -2877,6 +2877,12 @@ static int bcmgenet_open(struct net_devi + if (priv->internal_phy) + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + ++ ret = bcmgenet_mii_connect(dev); ++ if (ret) { ++ netdev_err(dev, "failed to connect to PHY\n"); ++ goto err_clk_disable; ++ } ++ + /* take MAC out of reset */ + bcmgenet_umac_reset(priv); + +@@ -2886,6 +2892,12 @@ static int bcmgenet_open(struct net_devi + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + ++ ret = bcmgenet_mii_config(dev, true); ++ if (ret) { ++ netdev_err(dev, "unsupported PHY\n"); ++ goto err_disconnect_phy; ++ } ++ + bcmgenet_set_hw_addr(priv, dev->dev_addr); + + if (priv->internal_phy) { +@@ -2901,7 +2913,7 @@ static int bcmgenet_open(struct net_devi + ret = bcmgenet_init_dma(priv); + if (ret) { + netdev_err(dev, "failed to initialize DMA\n"); +- goto err_clk_disable; ++ goto err_disconnect_phy; + } + + /* Always enable ring 16 - descriptor ring */ +@@ -2924,25 +2936,19 @@ static int bcmgenet_open(struct net_devi + goto err_irq0; + } + +- ret = bcmgenet_mii_probe(dev); +- if (ret) { +- netdev_err(dev, "failed to connect to PHY\n"); +- goto err_irq1; +- } +- + bcmgenet_netif_start(dev); + + netif_tx_start_all_queues(dev); + + return 0; + +-err_irq1: +- free_irq(priv->irq1, priv); + err_irq0: + free_irq(priv->irq0, priv); + err_fini_dma: + bcmgenet_dma_teardown(priv); + bcmgenet_fini_dma(priv); ++err_disconnect_phy: ++ phy_disconnect(dev->phydev); + err_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); +@@ -3625,6 +3631,8 @@ static int bcmgenet_resume(struct device + if (priv->internal_phy) + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + ++ phy_init_hw(dev->phydev); ++ + bcmgenet_umac_reset(priv); + + init_umac(priv); +@@ -3633,8 +3641,6 @@ static int bcmgenet_resume(struct device + if (priv->wolopts) + clk_disable_unprepare(priv->clk_wol); + +- phy_init_hw(dev->phydev); +- + /* Speed settings must be restored */ + bcmgenet_mii_config(priv->dev, false); + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h +@@ -720,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF); + + /* MDIO routines */ + int bcmgenet_mii_init(struct net_device *dev); ++int bcmgenet_mii_connect(struct net_device *dev); + int bcmgenet_mii_config(struct net_device *dev, bool init); +-int bcmgenet_mii_probe(struct net_device *dev); + void bcmgenet_mii_exit(struct net_device *dev); + void bcmgenet_phy_power_set(struct net_device *dev, bool enable); + void bcmgenet_mii_setup(struct net_device *dev); +--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c +@@ -173,6 +173,46 @@ static void bcmgenet_moca_phy_setup(stru + bcmgenet_fixed_phy_link_update); + } + ++int bcmgenet_mii_connect(struct net_device *dev) ++{ ++ struct bcmgenet_priv *priv = netdev_priv(dev); ++ struct device_node *dn = priv->pdev->dev.of_node; ++ struct phy_device *phydev; ++ u32 phy_flags = 0; ++ int ret; ++ ++ /* Communicate the integrated PHY revision */ ++ if (priv->internal_phy) ++ phy_flags = priv->gphy_rev; ++ ++ /* Initialize link state variables that bcmgenet_mii_setup() uses */ ++ priv->old_link = -1; ++ priv->old_speed = -1; ++ priv->old_duplex = -1; ++ priv->old_pause = -1; ++ ++ if (dn) { ++ phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, ++ phy_flags, priv->phy_interface); ++ if (!phydev) { ++ pr_err("could not attach to PHY\n"); ++ return -ENODEV; ++ } ++ } else { ++ phydev = dev->phydev; ++ phydev->dev_flags = phy_flags; ++ ++ ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, ++ priv->phy_interface); ++ if (ret) { ++ pr_err("could not attach to PHY\n"); ++ return -ENODEV; ++ } ++ } ++ ++ return 0; ++} ++ + int bcmgenet_mii_config(struct net_device *dev, bool init) + { + struct bcmgenet_priv *priv = netdev_priv(dev); +@@ -266,71 +306,21 @@ int bcmgenet_mii_config(struct net_devic + bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + } + +- if (init) +- dev_info(kdev, "configuring instance for %s\n", phy_name); +- +- return 0; +-} +- +-int bcmgenet_mii_probe(struct net_device *dev) +-{ +- struct bcmgenet_priv *priv = netdev_priv(dev); +- struct device_node *dn = priv->pdev->dev.of_node; +- struct phy_device *phydev; +- u32 phy_flags = 0; +- int ret; +- +- /* Communicate the integrated PHY revision */ +- if (priv->internal_phy) +- phy_flags = priv->gphy_rev; +- +- /* Initialize link state variables that bcmgenet_mii_setup() uses */ +- priv->old_link = -1; +- priv->old_speed = -1; +- priv->old_duplex = -1; +- priv->old_pause = -1; +- +- if (dn) { +- phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, +- phy_flags, priv->phy_interface); +- if (!phydev) { +- pr_err("could not attach to PHY\n"); +- return -ENODEV; +- } +- } else { +- phydev = dev->phydev; +- phydev->dev_flags = phy_flags; ++ if (init) { ++ linkmode_copy(phydev->advertising, phydev->supported); + +- ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, +- priv->phy_interface); +- if (ret) { +- pr_err("could not attach to PHY\n"); +- return -ENODEV; +- } +- } ++ /* The internal PHY has its link interrupts routed to the ++ * Ethernet MAC ISRs. On GENETv5 there is a hardware issue ++ * that prevents the signaling of link UP interrupts when ++ * the link operates at 10Mbps, so fallback to polling for ++ * those versions of GENET. ++ */ ++ if (priv->internal_phy && !GENET_IS_V5(priv)) ++ phydev->irq = PHY_IGNORE_INTERRUPT; + +- /* Configure port multiplexer based on what the probed PHY device since +- * reading the 'max-speed' property determines the maximum supported +- * PHY speed which is needed for bcmgenet_mii_config() to configure +- * things appropriately. +- */ +- ret = bcmgenet_mii_config(dev, true); +- if (ret) { +- phy_disconnect(dev->phydev); +- return ret; ++ dev_info(kdev, "configuring instance for %s\n", phy_name); + } + +- linkmode_copy(phydev->advertising, phydev->supported); +- +- /* The internal PHY has its link interrupts routed to the +- * Ethernet MAC ISRs. On GENETv5 there is a hardware issue +- * that prevents the signaling of link UP interrupts when +- * the link operates at 10Mbps, so fallback to polling for +- * those versions of GENET. +- */ +- if (priv->internal_phy && !GENET_IS_V5(priv)) +- dev->phydev->irq = PHY_IGNORE_INTERRUPT; +- + return 0; + } + diff --git a/queue-5.3/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch b/queue-5.3/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch new file mode 100644 index 00000000000..e91ff9a0392 --- /dev/null +++ b/queue-5.3/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch @@ -0,0 +1,33 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Florian Fainelli +Date: Sat, 5 Oct 2019 15:05:18 -0700 +Subject: net: dsa: b53: Do not clear existing mirrored port mask + +From: Florian Fainelli + +[ Upstream commit c763ac436b668d7417f0979430ec0312ede4093d ] + +Clearing the existing bitmask of mirrored ports essentially prevents us +from capturing more than one port at any given time. This is clearly +wrong, do not clear the bitmask prior to setting up the new port. + +Reported-by: Hubert Feurstein +Fixes: ed3af5fd08eb ("net: dsa: b53: Add support for port mirroring") +Signed-off-by: Florian Fainelli +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/b53/b53_common.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/net/dsa/b53/b53_common.c ++++ b/drivers/net/dsa/b53/b53_common.c +@@ -1808,7 +1808,6 @@ int b53_mirror_add(struct dsa_switch *ds + loc = B53_EG_MIR_CTL; + + b53_read16(dev, B53_MGMT_PAGE, loc, ®); +- reg &= ~MIRROR_MASK; + reg |= BIT(port); + b53_write16(dev, B53_MGMT_PAGE, loc, reg); + diff --git a/queue-5.3/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch b/queue-5.3/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch new file mode 100644 index 00000000000..6a30fe23226 --- /dev/null +++ b/queue-5.3/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch @@ -0,0 +1,80 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Florian Fainelli +Date: Thu, 31 Oct 2019 15:54:05 -0700 +Subject: net: dsa: bcm_sf2: Fix IMP setup for port different than 8 + +From: Florian Fainelli + +[ Upstream commit 5fc0f21246e50afdf318b5a3a941f7f4f57b8947 ] + +Since it became possible for the DSA core to use a CPU port different +than 8, our bcm_sf2_imp_setup() function was broken because it assumes +that registers are applicable to port 8. In particular, the port's MAC +is going to stay disabled, so make sure we clear the RX_DIS and TX_DIS +bits if we are not configured for port 8. + +Fixes: 9f91484f6fcc ("net: dsa: make "label" property optional for dsa2") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 36 +++++++++++++++++++++--------------- + 1 file changed, 21 insertions(+), 15 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -37,22 +37,11 @@ static void bcm_sf2_imp_setup(struct dsa + unsigned int i; + u32 reg, offset; + +- if (priv->type == BCM7445_DEVICE_ID) +- offset = CORE_STS_OVERRIDE_IMP; +- else +- offset = CORE_STS_OVERRIDE_IMP2; +- + /* Enable the port memories */ + reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL); + reg &= ~P_TXQ_PSM_VDD(port); + core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); + +- /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ +- reg = core_readl(priv, CORE_IMP_CTL); +- reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); +- reg &= ~(RX_DIS | TX_DIS); +- core_writel(priv, reg, CORE_IMP_CTL); +- + /* Enable forwarding */ + core_writel(priv, SW_FWDG_EN, CORE_SWMODE); + +@@ -71,10 +60,27 @@ static void bcm_sf2_imp_setup(struct dsa + + b53_brcm_hdr_setup(ds, port); + +- /* Force link status for IMP port */ +- reg = core_readl(priv, offset); +- reg |= (MII_SW_OR | LINK_STS); +- core_writel(priv, reg, offset); ++ if (port == 8) { ++ if (priv->type == BCM7445_DEVICE_ID) ++ offset = CORE_STS_OVERRIDE_IMP; ++ else ++ offset = CORE_STS_OVERRIDE_IMP2; ++ ++ /* Force link status for IMP port */ ++ reg = core_readl(priv, offset); ++ reg |= (MII_SW_OR | LINK_STS); ++ core_writel(priv, reg, offset); ++ ++ /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ ++ reg = core_readl(priv, CORE_IMP_CTL); ++ reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); ++ reg &= ~(RX_DIS | TX_DIS); ++ core_writel(priv, reg, CORE_IMP_CTL); ++ } else { ++ reg = core_readl(priv, CORE_G_PCTL_PORT(port)); ++ reg &= ~(RX_DIS | TX_DIS); ++ core_writel(priv, reg, CORE_G_PCTL_PORT(port)); ++ } + } + + static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable) diff --git a/queue-5.3/net-dsa-fix-switch-tree-list.patch b/queue-5.3/net-dsa-fix-switch-tree-list.patch new file mode 100644 index 00000000000..306f04b52e6 --- /dev/null +++ b/queue-5.3/net-dsa-fix-switch-tree-list.patch @@ -0,0 +1,32 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Vivien Didelot +Date: Fri, 18 Oct 2019 17:02:46 -0400 +Subject: net: dsa: fix switch tree list + +From: Vivien Didelot + +[ Upstream commit 50c7d2ba9de20f60a2d527ad6928209ef67e4cdd ] + +If there are multiple switch trees on the device, only the last one +will be listed, because the arguments of list_add_tail are swapped. + +Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") +Signed-off-by: Vivien Didelot +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -46,7 +46,7 @@ static struct dsa_switch_tree *dsa_tree_ + dst->index = index; + + INIT_LIST_HEAD(&dst->list); +- list_add_tail(&dsa_tree_list, &dst->list); ++ list_add_tail(&dst->list, &dsa_tree_list); + + kref_init(&dst->refcount); + diff --git a/queue-5.3/net-ensure-correct-skb-tstamp-in-various-fragmenters.patch b/queue-5.3/net-ensure-correct-skb-tstamp-in-various-fragmenters.patch new file mode 100644 index 00000000000..88944e55f1f --- /dev/null +++ b/queue-5.3/net-ensure-correct-skb-tstamp-in-various-fragmenters.patch @@ -0,0 +1,140 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 16 Oct 2019 18:00:56 -0700 +Subject: net: ensure correct skb->tstamp in various fragmenters + +From: Eric Dumazet + +[ Upstream commit 9669fffc1415bb0c30e5d2ec98a8e1c3a418cb9c ] + +Thomas found that some forwarded packets would be stuck +in FQ packet scheduler because their skb->tstamp contained +timestamps far in the future. + +We thought we addressed this point in commit 8203e2d844d3 +("net: clear skb->tstamp in forwarding paths") but there +is still an issue when/if a packet needs to be fragmented. + +In order to meet EDT requirements, we have to make sure all +fragments get the original skb->tstamp. + +Note that this original skb->tstamp should be zero in +forwarding path, but might have a non zero value in +output path if user decided so. + +Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") +Signed-off-by: Eric Dumazet +Reported-by: Thomas Bartschies +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/netfilter/nf_conntrack_bridge.c | 3 +++ + net/ipv4/ip_output.c | 3 +++ + net/ipv6/ip6_output.c | 3 +++ + net/ipv6/netfilter.c | 3 +++ + 4 files changed, 12 insertions(+) + +--- a/net/bridge/netfilter/nf_conntrack_bridge.c ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c +@@ -34,6 +34,7 @@ static int nf_br_ip_fragment(struct net + { + int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + unsigned int hlen, ll_rs, mtu; ++ ktime_t tstamp = skb->tstamp; + struct ip_frag_state state; + struct iphdr *iph; + int err; +@@ -81,6 +82,7 @@ static int nf_br_ip_fragment(struct net + if (iter.frag) + ip_fraglist_prepare(skb, &iter); + ++ skb->tstamp = tstamp; + err = output(net, sk, data, skb); + if (err || !iter.frag) + break; +@@ -105,6 +107,7 @@ slow_path: + goto blackhole; + } + ++ skb2->tstamp = tstamp; + err = output(net, sk, data, skb2); + if (err) + goto blackhole; +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -771,6 +771,7 @@ int ip_do_fragment(struct net *net, stru + struct rtable *rt = skb_rtable(skb); + unsigned int mtu, hlen, ll_rs; + struct ip_fraglist_iter iter; ++ ktime_t tstamp = skb->tstamp; + struct ip_frag_state state; + int err = 0; + +@@ -846,6 +847,7 @@ int ip_do_fragment(struct net *net, stru + ip_fraglist_prepare(skb, &iter); + } + ++ skb->tstamp = tstamp; + err = output(net, sk, skb); + + if (!err) +@@ -901,6 +903,7 @@ slow_path: + /* + * Put this fragment into the sending queue. + */ ++ skb2->tstamp = tstamp; + err = output(net, sk, skb2); + if (err) + goto fail; +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -768,6 +768,7 @@ int ip6_fragment(struct net *net, struct + inet6_sk(skb->sk) : NULL; + struct ip6_frag_state state; + unsigned int mtu, hlen, nexthdr_offset; ++ ktime_t tstamp = skb->tstamp; + int hroom, err = 0; + __be32 frag_id; + u8 *prevhdr, nexthdr = 0; +@@ -855,6 +856,7 @@ int ip6_fragment(struct net *net, struct + if (iter.frag) + ip6_fraglist_prepare(skb, &iter); + ++ skb->tstamp = tstamp; + err = output(net, sk, skb); + if (!err) + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), +@@ -913,6 +915,7 @@ slow_path: + /* + * Put this fragment into the sending queue. + */ ++ frag->tstamp = tstamp; + err = output(net, sk, frag); + if (err) + goto fail; +--- a/net/ipv6/netfilter.c ++++ b/net/ipv6/netfilter.c +@@ -119,6 +119,7 @@ int br_ip6_fragment(struct net *net, str + struct sk_buff *)) + { + int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; ++ ktime_t tstamp = skb->tstamp; + struct ip6_frag_state state; + u8 *prevhdr, nexthdr = 0; + unsigned int mtu, hlen; +@@ -183,6 +184,7 @@ int br_ip6_fragment(struct net *net, str + if (iter.frag) + ip6_fraglist_prepare(skb, &iter); + ++ skb->tstamp = tstamp; + err = output(net, sk, data, skb); + if (err || !iter.frag) + break; +@@ -215,6 +217,7 @@ slow_path: + goto blackhole; + } + ++ skb2->tstamp = tstamp; + err = output(net, sk, data, skb2); + if (err) + goto blackhole; diff --git a/queue-5.3/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch b/queue-5.3/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch new file mode 100644 index 00000000000..df073850111 --- /dev/null +++ b/queue-5.3/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch @@ -0,0 +1,73 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Benjamin Herrenschmidt +Date: Fri, 25 Oct 2019 13:47:24 +1100 +Subject: net: ethernet: ftgmac100: Fix DMA coherency issue with SW checksum + +From: Benjamin Herrenschmidt + +[ Upstream commit 88824e3bf29a2fcacfd9ebbfe03063649f0f3254 ] + +We are calling the checksum helper after the dma_map_single() +call to map the packet. This is incorrect as the checksumming +code will touch the packet from the CPU. This means the cache +won't be properly flushes (or the bounce buffering will leave +us with the unmodified packet to DMA). + +This moves the calculation of the checksum & vlan tags to +before the DMA mapping. + +This also has the side effect of fixing another bug: If the +checksum helper fails, we goto "drop" to drop the packet, which +will not unmap the DMA mapping. + +Signed-off-by: Benjamin Herrenschmidt +Fixes: 05690d633f30 ("ftgmac100: Upgrade to NETIF_F_HW_CSUM") +Reviewed-by: Vijay Khemka +Tested-by: Vijay Khemka +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/faraday/ftgmac100.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +--- a/drivers/net/ethernet/faraday/ftgmac100.c ++++ b/drivers/net/ethernet/faraday/ftgmac100.c +@@ -726,6 +726,18 @@ static netdev_tx_t ftgmac100_hard_start_ + */ + nfrags = skb_shinfo(skb)->nr_frags; + ++ /* Setup HW checksumming */ ++ csum_vlan = 0; ++ if (skb->ip_summed == CHECKSUM_PARTIAL && ++ !ftgmac100_prep_tx_csum(skb, &csum_vlan)) ++ goto drop; ++ ++ /* Add VLAN tag */ ++ if (skb_vlan_tag_present(skb)) { ++ csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; ++ csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; ++ } ++ + /* Get header len */ + len = skb_headlen(skb); + +@@ -752,19 +764,6 @@ static netdev_tx_t ftgmac100_hard_start_ + if (nfrags == 0) + f_ctl_stat |= FTGMAC100_TXDES0_LTS; + txdes->txdes3 = cpu_to_le32(map); +- +- /* Setup HW checksumming */ +- csum_vlan = 0; +- if (skb->ip_summed == CHECKSUM_PARTIAL && +- !ftgmac100_prep_tx_csum(skb, &csum_vlan)) +- goto drop; +- +- /* Add VLAN tag */ +- if (skb_vlan_tag_present(skb)) { +- csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; +- csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; +- } +- + txdes->txdes1 = cpu_to_le32(csum_vlan); + + /* Next descriptor */ diff --git a/queue-5.3/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch b/queue-5.3/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch new file mode 100644 index 00000000000..9b92402e4a3 --- /dev/null +++ b/queue-5.3/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch @@ -0,0 +1,160 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Tejun Heo +Date: Thu, 24 Oct 2019 13:50:27 -0700 +Subject: net: fix sk_page_frag() recursion from memory reclaim + +From: Tejun Heo + +[ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ] + +sk_page_frag() optimizes skb_frag allocations by using per-task +skb_frag cache when it knows it's the only user. The condition is +determined by seeing whether the socket allocation mask allows +blocking - if the allocation may block, it obviously owns the task's +context and ergo exclusively owns current->task_frag. + +Unfortunately, this misses recursion through memory reclaim path. +Please take a look at the following backtrace. + + [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10 + ... + tcp_sendmsg+0x27/0x40 + sock_sendmsg+0x30/0x40 + sock_xmit.isra.24+0xa1/0x170 [nbd] + nbd_send_cmd+0x1d2/0x690 [nbd] + nbd_queue_rq+0x1b5/0x3b0 [nbd] + __blk_mq_try_issue_directly+0x108/0x1b0 + blk_mq_request_issue_directly+0xbd/0xe0 + blk_mq_try_issue_list_directly+0x41/0xb0 + blk_mq_sched_insert_requests+0xa2/0xe0 + blk_mq_flush_plug_list+0x205/0x2a0 + blk_flush_plug_list+0xc3/0xf0 + [1] blk_finish_plug+0x21/0x2e + _xfs_buf_ioapply+0x313/0x460 + __xfs_buf_submit+0x67/0x220 + xfs_buf_read_map+0x113/0x1a0 + xfs_trans_read_buf_map+0xbf/0x330 + xfs_btree_read_buf_block.constprop.42+0x95/0xd0 + xfs_btree_lookup_get_block+0x95/0x170 + xfs_btree_lookup+0xcc/0x470 + xfs_bmap_del_extent_real+0x254/0x9a0 + __xfs_bunmapi+0x45c/0xab0 + xfs_bunmapi+0x15/0x30 + xfs_itruncate_extents_flags+0xca/0x250 + xfs_free_eofblocks+0x181/0x1e0 + xfs_fs_destroy_inode+0xa8/0x1b0 + destroy_inode+0x38/0x70 + dispose_list+0x35/0x50 + prune_icache_sb+0x52/0x70 + super_cache_scan+0x120/0x1a0 + do_shrink_slab+0x120/0x290 + shrink_slab+0x216/0x2b0 + shrink_node+0x1b6/0x4a0 + do_try_to_free_pages+0xc6/0x370 + try_to_free_mem_cgroup_pages+0xe3/0x1e0 + try_charge+0x29e/0x790 + mem_cgroup_charge_skmem+0x6a/0x100 + __sk_mem_raise_allocated+0x18e/0x390 + __sk_mem_schedule+0x2a/0x40 + [0] tcp_sendmsg_locked+0x8eb/0xe10 + tcp_sendmsg+0x27/0x40 + sock_sendmsg+0x30/0x40 + ___sys_sendmsg+0x26d/0x2b0 + __sys_sendmsg+0x57/0xa0 + do_syscall_64+0x42/0x100 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +In [0], tcp_send_msg_locked() was using current->page_frag when it +called sk_wmem_schedule(). It already calculated how many bytes can +be fit into current->page_frag. Due to memory pressure, +sk_wmem_schedule() called into memory reclaim path which called into +xfs and then IO issue path. Because the filesystem in question is +backed by nbd, the control goes back into the tcp layer - back into +tcp_sendmsg_locked(). + +nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes +sense - it's in the process of freeing memory and wants to be able to, +e.g., drop clean pages to make forward progress. However, this +confused sk_page_frag() called from [2]. Because it only tests +whether the allocation allows blocking which it does, it now thinks +current->page_frag can be used again although it already was being +used in [0]. + +After [2] used current->page_frag, the offset would be increased by +the used amount. When the control returns to [0], +current->page_frag's offset is increased and the previously calculated +number of bytes now may overrun the end of allocated memory leading to +silent memory corruptions. + +Fix it by adding gfpflags_normal_context() which tests sleepable && +!reclaim and use it to determine whether to use current->task_frag. + +v2: Eric didn't like gfp flags being tested twice. Introduce a new + helper gfpflags_normal_context() and combine the two tests. + +Signed-off-by: Tejun Heo +Cc: Josef Bacik +Cc: Eric Dumazet +Cc: stable@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/gfp.h | 23 +++++++++++++++++++++++ + include/net/sock.h | 11 ++++++++--- + 2 files changed, 31 insertions(+), 3 deletions(-) + +--- a/include/linux/gfp.h ++++ b/include/linux/gfp.h +@@ -325,6 +325,29 @@ static inline bool gfpflags_allow_blocki + return !!(gfp_flags & __GFP_DIRECT_RECLAIM); + } + ++/** ++ * gfpflags_normal_context - is gfp_flags a normal sleepable context? ++ * @gfp_flags: gfp_flags to test ++ * ++ * Test whether @gfp_flags indicates that the allocation is from the ++ * %current context and allowed to sleep. ++ * ++ * An allocation being allowed to block doesn't mean it owns the %current ++ * context. When direct reclaim path tries to allocate memory, the ++ * allocation context is nested inside whatever %current was doing at the ++ * time of the original allocation. The nested allocation may be allowed ++ * to block but modifying anything %current owns can corrupt the outer ++ * context's expectations. ++ * ++ * %true result from this function indicates that the allocation context ++ * can sleep and use anything that's associated with %current. ++ */ ++static inline bool gfpflags_normal_context(const gfp_t gfp_flags) ++{ ++ return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == ++ __GFP_DIRECT_RECLAIM; ++} ++ + #ifdef CONFIG_HIGHMEM + #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM + #else +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -2233,12 +2233,17 @@ struct sk_buff *sk_stream_alloc_skb(stru + * sk_page_frag - return an appropriate page_frag + * @sk: socket + * +- * If socket allocation mode allows current thread to sleep, it means its +- * safe to use the per task page_frag instead of the per socket one. ++ * Use the per task page_frag instead of the per socket one for ++ * optimization when we know that we're in the normal context and owns ++ * everything that's associated with %current. ++ * ++ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest ++ * inside other socket operations and end up recursing into sk_page_frag() ++ * while it's already in use. + */ + static inline struct page_frag *sk_page_frag(struct sock *sk) + { +- if (gfpflags_allow_blocking(sk->sk_allocation)) ++ if (gfpflags_normal_context(sk->sk_allocation)) + return ¤t->task_frag; + + return &sk->sk_frag; diff --git a/queue-5.3/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch b/queue-5.3/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch new file mode 100644 index 00000000000..e0cb04a3db9 --- /dev/null +++ b/queue-5.3/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch @@ -0,0 +1,76 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Jiangfeng Xiao +Date: Mon, 28 Oct 2019 13:09:46 +0800 +Subject: net: hisilicon: Fix ping latency when deal with high throughput + +From: Jiangfeng Xiao + +[ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ] + +This is due to error in over budget processing. +When dealing with high throughput, the used buffers +that exceeds the budget is not cleaned up. In addition, +it takes a lot of cycles to clean up the used buffer, +and then the buffer where the valid data is located can take effect. + +Signed-off-by: Jiangfeng Xiao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/hisilicon/hip04_eth.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/hisilicon/hip04_eth.c ++++ b/drivers/net/ethernet/hisilicon/hip04_eth.c +@@ -237,6 +237,7 @@ struct hip04_priv { + dma_addr_t rx_phys[RX_DESC_NUM]; + unsigned int rx_head; + unsigned int rx_buf_size; ++ unsigned int rx_cnt_remaining; + + struct device_node *phy_node; + struct phy_device *phy; +@@ -575,7 +576,6 @@ static int hip04_rx_poll(struct napi_str + struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi); + struct net_device *ndev = priv->ndev; + struct net_device_stats *stats = &ndev->stats; +- unsigned int cnt = hip04_recv_cnt(priv); + struct rx_desc *desc; + struct sk_buff *skb; + unsigned char *buf; +@@ -588,8 +588,8 @@ static int hip04_rx_poll(struct napi_str + + /* clean up tx descriptors */ + tx_remaining = hip04_tx_reclaim(ndev, false); +- +- while (cnt && !last) { ++ priv->rx_cnt_remaining += hip04_recv_cnt(priv); ++ while (priv->rx_cnt_remaining && !last) { + buf = priv->rx_buf[priv->rx_head]; + skb = build_skb(buf, priv->rx_buf_size); + if (unlikely(!skb)) { +@@ -635,11 +635,13 @@ refill: + hip04_set_recv_desc(priv, phys); + + priv->rx_head = RX_NEXT(priv->rx_head); +- if (rx >= budget) ++ if (rx >= budget) { ++ --priv->rx_cnt_remaining; + goto done; ++ } + +- if (--cnt == 0) +- cnt = hip04_recv_cnt(priv); ++ if (--priv->rx_cnt_remaining == 0) ++ priv->rx_cnt_remaining += hip04_recv_cnt(priv); + } + + if (!(priv->reg_inten & RCV_INT)) { +@@ -724,6 +726,7 @@ static int hip04_mac_open(struct net_dev + int i; + + priv->rx_head = 0; ++ priv->rx_cnt_remaining = 0; + priv->tx_head = 0; + priv->tx_tail = 0; + hip04_reset_ppe(priv); diff --git a/queue-5.3/net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch b/queue-5.3/net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch new file mode 100644 index 00000000000..a66a61948f8 --- /dev/null +++ b/queue-5.3/net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch @@ -0,0 +1,235 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Yonglong Liu +Date: Fri, 18 Oct 2019 11:42:59 +0800 +Subject: net: hns3: fix mis-counting IRQ vector numbers issue + +From: Yonglong Liu + +[ Upstream commit 580a05f9d4ada3bfb689140d0efec1efdb8a48da ] + +Currently, the num_msi_left means the vector numbers of NIC, +but if the PF supported RoCE, it contains the vector numbers +of NIC and RoCE(Not expected). + +This may cause interrupts lost in some case, because of the +NIC module used the vector resources which belongs to RoCE. + +This patch adds a new variable num_nic_msi to store the vector +numbers of NIC, and adjust the default TQP numbers and rss_size +according to the value of num_nic_msi. + +Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") +Signed-off-by: Yonglong Liu +Signed-off-by: Huazhong Tan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 + + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 21 ++++++++++ + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 11 ++++- + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 28 ++++++++++++-- + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 6 files changed, 58 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h +@@ -32,6 +32,8 @@ + + #define HNAE3_MOD_VERSION "1.0" + ++#define HNAE3_MIN_VECTOR_NUM 2 /* first one for misc, another for IO */ ++ + /* Device IDs */ + #define HNAE3_DEV_ID_GE 0xA220 + #define HNAE3_DEV_ID_25GE 0xA221 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -800,6 +800,9 @@ static int hclge_query_pf_resource(struc + hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), + HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); + ++ /* nic's msix numbers is always equals to the roce's. */ ++ hdev->num_nic_msi = hdev->num_roce_msi; ++ + /* PF should have NIC vectors and Roce vectors, + * NIC vectors are queued before Roce vectors. + */ +@@ -809,6 +812,15 @@ static int hclge_query_pf_resource(struc + hdev->num_msi = + hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), + HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); ++ ++ hdev->num_nic_msi = hdev->num_msi; ++ } ++ ++ if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) { ++ dev_err(&hdev->pdev->dev, ++ "Just %u msi resources, not enough for pf(min:2).\n", ++ hdev->num_nic_msi); ++ return -EINVAL; + } + + return 0; +@@ -1394,6 +1406,10 @@ static int hclge_assign_tqp(struct hclg + kinfo->rss_size = min_t(u16, hdev->rss_size_max, + vport->alloc_tqps / hdev->tm_info.num_tc); + ++ /* ensure one to one mapping between irq and queue at default */ ++ kinfo->rss_size = min_t(u16, kinfo->rss_size, ++ (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc); ++ + return 0; + } + +@@ -2172,7 +2188,8 @@ static int hclge_init_msi(struct hclge_d + int vectors; + int i; + +- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, ++ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, ++ hdev->num_msi, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (vectors < 0) { + dev_err(&pdev->dev, +@@ -2187,6 +2204,7 @@ static int hclge_init_msi(struct hclge_d + + hdev->num_msi = vectors; + hdev->num_msi_left = vectors; ++ + hdev->base_msi_vector = pdev->irq; + hdev->roce_base_vector = hdev->base_msi_vector + + hdev->roce_base_msix_offset; +@@ -3644,6 +3662,7 @@ static int hclge_get_vector(struct hnae3 + int alloc = 0; + int i, j; + ++ vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num); + vector_num = min(hdev->num_msi_left, vector_num); + + for (j = 0; j < vector_num; j++) { +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +@@ -795,6 +795,7 @@ struct hclge_dev { + u32 base_msi_vector; + u16 *vector_status; + int *vector_irq; ++ u16 num_nic_msi; /* Num of nic vectors for this PF */ + u16 num_roce_msi; /* Num of roce vectors for this PF */ + int roce_base_vector; + +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +@@ -540,9 +540,16 @@ static void hclge_tm_vport_tc_info_updat + kinfo->rss_size = kinfo->req_rss_size; + } else if (kinfo->rss_size > max_rss_size || + (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { ++ /* if user not set rss, the rss_size should compare with the ++ * valid msi numbers to ensure one to one map between tqp and ++ * irq as default. ++ */ ++ if (!kinfo->req_rss_size) ++ max_rss_size = min_t(u16, max_rss_size, ++ (hdev->num_nic_msi - 1) / ++ kinfo->num_tc); ++ + /* Set to the maximum specification value (max_rss_size). */ +- dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", +- kinfo->rss_size, max_rss_size); + kinfo->rss_size = max_rss_size; + } + +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +@@ -411,6 +411,13 @@ static int hclgevf_knic_setup(struct hcl + kinfo->tqp[i] = &hdev->htqp[i].q; + } + ++ /* after init the max rss_size and tqps, adjust the default tqp numbers ++ * and rss size with the actual vector numbers ++ */ ++ kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps); ++ kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc, ++ kinfo->rss_size); ++ + return 0; + } + +@@ -502,6 +509,7 @@ static int hclgevf_get_vector(struct hna + int alloc = 0; + int i, j; + ++ vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num); + vector_num = min(hdev->num_msi_left, vector_num); + + for (j = 0; j < vector_num; j++) { +@@ -2208,13 +2216,14 @@ static int hclgevf_init_msi(struct hclge + int vectors; + int i; + +- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) ++ if (hnae3_dev_roce_supported(hdev)) + vectors = pci_alloc_irq_vectors(pdev, + hdev->roce_base_msix_offset + 1, + hdev->num_msi, + PCI_IRQ_MSIX); + else +- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, ++ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, ++ hdev->num_msi, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + + if (vectors < 0) { +@@ -2230,6 +2239,7 @@ static int hclgevf_init_msi(struct hclge + + hdev->num_msi = vectors; + hdev->num_msi_left = vectors; ++ + hdev->base_msi_vector = pdev->irq; + hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset; + +@@ -2495,7 +2505,7 @@ static int hclgevf_query_vf_resource(str + + req = (struct hclgevf_query_res_cmd *)desc.data; + +- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) { ++ if (hnae3_dev_roce_supported(hdev)) { + hdev->roce_base_msix_offset = + hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee), + HCLGEVF_MSIX_OFT_ROCEE_M, +@@ -2504,6 +2514,9 @@ static int hclgevf_query_vf_resource(str + hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number), + HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S); + ++ /* nic's msix numbers is always equals to the roce's. */ ++ hdev->num_nic_msix = hdev->num_roce_msix; ++ + /* VF should have NIC vectors and Roce vectors, NIC vectors + * are queued before Roce vectors. The offset is fixed to 64. + */ +@@ -2513,6 +2526,15 @@ static int hclgevf_query_vf_resource(str + hdev->num_msi = + hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number), + HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S); ++ ++ hdev->num_nic_msix = hdev->num_msi; ++ } ++ ++ if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) { ++ dev_err(&hdev->pdev->dev, ++ "Just %u msi resources, not enough for vf(min:2).\n", ++ hdev->num_nic_msix); ++ return -EINVAL; + } + + return 0; +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +@@ -265,6 +265,7 @@ struct hclgevf_dev { + u16 num_msi; + u16 num_msi_left; + u16 num_msi_used; ++ u16 num_nic_msix; /* Num of nic vectors for this VF */ + u16 num_roce_msix; /* Num of roce vectors for this VF */ + u16 roce_base_msix_offset; + int roce_base_vector; diff --git a/queue-5.3/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch b/queue-5.3/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch new file mode 100644 index 00000000000..dd203470141 --- /dev/null +++ b/queue-5.3/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch @@ -0,0 +1,94 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eran Ben Elisha +Date: Sun, 27 Oct 2019 16:39:15 +0200 +Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF + +From: Eran Ben Elisha + +[ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ] + +Prior to this patch, the amount of counters guaranteed per VF in the +resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was +set regardless if the VF was single or dual port. +This caused several VFs to have no guaranteed counters although the +system could satisfy their request. + +The fix is to dynamically guarantee counters, based on each VF +specification. + +Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") +Signed-off-by: Eran Ben Elisha +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 42 +++++++++++------- + 1 file changed, 26 insertions(+), 16 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c ++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +@@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *d + priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; + } + +-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) ++static int ++mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, ++ struct resource_allocator *res_alloc, ++ int vf) + { +- /* reduce the sink counter */ +- return (dev->caps.max_counters - 1 - +- (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) +- / MLX4_MAX_PORTS; ++ struct mlx4_active_ports actv_ports; ++ int ports, counters_guaranteed; ++ ++ /* For master, only allocate according to the number of phys ports */ ++ if (vf == mlx4_master_func_num(dev)) ++ return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; ++ ++ /* calculate real number of ports for the VF */ ++ actv_ports = mlx4_get_active_ports(dev, vf); ++ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); ++ counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; ++ ++ /* If we do not have enough counters for this VF, do not ++ * allocate any for it. '-1' to reduce the sink counter. ++ */ ++ if ((res_alloc->res_reserved + counters_guaranteed) > ++ (dev->caps.max_counters - 1)) ++ return 0; ++ ++ return counters_guaranteed; + } + + int mlx4_init_resource_tracker(struct mlx4_dev *dev) +@@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct ml + struct mlx4_priv *priv = mlx4_priv(dev); + int i, j; + int t; +- int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); + + priv->mfunc.master.res_tracker.slave_list = + kcalloc(dev->num_slaves, sizeof(struct slave_list), +@@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct ml + break; + case RES_COUNTER: + res_alloc->quota[t] = dev->caps.max_counters; +- if (t == mlx4_master_func_num(dev)) +- res_alloc->guaranteed[t] = +- MLX4_PF_COUNTERS_PER_PORT * +- MLX4_MAX_PORTS; +- else if (t <= max_vfs_guarantee_counter) +- res_alloc->guaranteed[t] = +- MLX4_VF_COUNTERS_PER_PORT * +- MLX4_MAX_PORTS; +- else +- res_alloc->guaranteed[t] = 0; ++ res_alloc->guaranteed[t] = ++ mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); + break; + default: + break; diff --git a/queue-5.3/net-mlx5-fix-flow-counter-list-auto-bits-struct.patch b/queue-5.3/net-mlx5-fix-flow-counter-list-auto-bits-struct.patch new file mode 100644 index 00000000000..aa331430500 --- /dev/null +++ b/queue-5.3/net-mlx5-fix-flow-counter-list-auto-bits-struct.patch @@ -0,0 +1,37 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Roi Dayan +Date: Wed, 11 Sep 2019 14:44:50 +0300 +Subject: net/mlx5: Fix flow counter list auto bits struct + +From: Roi Dayan + +[ Upstream commit 6dfef396ea13873ae9066ee2e0ad6ee364031fe2 ] + +The union should contain the extended dest and counter list. +Remove the resevered 0x40 bits which is redundant. +This change doesn't break any functionally. +Everything works today because the code in fs_cmd.c is using +the correct structs if extended dest or the basic dest. + +Fixes: 1b115498598f ("net/mlx5: Introduce extended destination fields") +Signed-off-by: Roi Dayan +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mlx5/mlx5_ifc.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -1437,9 +1437,8 @@ struct mlx5_ifc_extended_dest_format_bit + }; + + union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { +- struct mlx5_ifc_dest_format_struct_bits dest_format_struct; ++ struct mlx5_ifc_extended_dest_format_bits extended_dest_format; + struct mlx5_ifc_flow_counter_list_bits flow_counter_list; +- u8 reserved_at_0[0x40]; + }; + + struct mlx5_ifc_fte_match_param_bits { diff --git a/queue-5.3/net-mlx5-fix-rtable-reference-leak.patch b/queue-5.3/net-mlx5-fix-rtable-reference-leak.patch new file mode 100644 index 00000000000..e50638278c8 --- /dev/null +++ b/queue-5.3/net-mlx5-fix-rtable-reference-leak.patch @@ -0,0 +1,58 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Parav Pandit +Date: Thu, 19 Sep 2019 15:58:14 -0500 +Subject: net/mlx5: Fix rtable reference leak + +From: Parav Pandit + +[ Upstream commit 2347cee83b2bd868bde2d283db0fac89f22be4e0 ] + +If the rt entry gateway family is not AF_INET for multipath device, +rtable reference is leaked. +Hence, fix it by releasing the reference. + +Fixes: 5fb091e8130b ("net/mlx5e: Use hint to resolve route when in HW multipath mode") +Fixes: e32ee6c78efa ("net/mlx5e: Support tunnel encap over tagged Ethernet") +Signed-off-by: Parav Pandit +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +@@ -90,15 +90,19 @@ static int mlx5e_route_lookup_ipv4(struc + if (ret) + return ret; + +- if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) ++ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { ++ ip_rt_put(rt); + return -ENETUNREACH; ++ } + #else + return -EOPNOTSUPP; + #endif + + ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); +- if (ret < 0) ++ if (ret < 0) { ++ ip_rt_put(rt); + return ret; ++ } + + if (!(*out_ttl)) + *out_ttl = ip4_dst_hoplimit(&rt->dst); +@@ -142,8 +146,10 @@ static int mlx5e_route_lookup_ipv6(struc + *out_ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); +- if (ret < 0) ++ if (ret < 0) { ++ dst_release(dst); + return ret; ++ } + #else + return -EOPNOTSUPP; + #endif diff --git a/queue-5.3/net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch b/queue-5.3/net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch new file mode 100644 index 00000000000..e1e9574c40f --- /dev/null +++ b/queue-5.3/net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch @@ -0,0 +1,66 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Dmytro Linkin +Date: Wed, 4 Sep 2019 12:32:49 +0000 +Subject: net/mlx5e: Determine source port properly for vlan push action + +From: Dmytro Linkin + +[ Upstream commit d5dbcc4e87bc8444bd2f1ca4b8f787e1e5677ec2 ] + +Termination tables are used for vlan push actions on uplink ports. +To support RoCE dual port the source port value was placed in a register. +Fix the code to use an API method returning the source port according to +the FW capabilities. + +Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions") +Signed-off-by: Dmytro Linkin +Reviewed-by: Jianbo Liu +Reviewed-by: Oz Shlomo +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 22 +++++++--- + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +@@ -177,22 +177,32 @@ mlx5_eswitch_termtbl_actions_move(struct + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + ++static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, ++ const struct mlx5_flow_spec *spec) ++{ ++ u32 port_mask, port_value; ++ ++ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) ++ return spec->flow_context.flow_source == MLX5_VPORT_UPLINK; ++ ++ port_mask = MLX5_GET(fte_match_param, spec->match_criteria, ++ misc_parameters.source_port); ++ port_value = MLX5_GET(fte_match_param, spec->match_value, ++ misc_parameters.source_port); ++ return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK; ++} ++ + bool + mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) + { +- u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, +- misc_parameters.source_port); +- u32 port_value = MLX5_GET(fte_match_param, spec->match_value, +- misc_parameters.source_port); +- + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + return false; + + /* push vlan on RX */ + return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && +- ((port_mask & port_value) == MLX5_VPORT_UPLINK); ++ mlx5_eswitch_offload_is_uplink_port(esw, spec); + } + + struct mlx5_flow_handle * diff --git a/queue-5.3/net-mlx5e-fix-ethtool-self-test-link-speed.patch b/queue-5.3/net-mlx5e-fix-ethtool-self-test-link-speed.patch new file mode 100644 index 00000000000..474c24be9fc --- /dev/null +++ b/queue-5.3/net-mlx5e-fix-ethtool-self-test-link-speed.patch @@ -0,0 +1,60 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Aya Levin +Date: Wed, 2 Oct 2019 16:53:21 +0300 +Subject: net/mlx5e: Fix ethtool self test: link speed + +From: Aya Levin + +[ Upstream commit 534e7366f41b0c689b01af4375aefcd1462adedf ] + +Ethtool self test contains a test for link speed. This test reads the +PTYS register and determines whether the current speed is valid or not. +Change current implementation to use the function mlx5e_port_linkspeed() +that does the same check and fails when speed is invalid. This code +redundancy lead to a bug when mlx5e_port_linkspeed() was updated with +expended speeds and the self test was not. + +Fixes: 2c81bfd5ae56 ("net/mlx5e: Move port speed code from en_ethtool.c to en/port.c") +Signed-off-by: Aya Levin +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 15 +++------------ + 1 file changed, 3 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +@@ -35,6 +35,7 @@ + #include + #include + #include "en.h" ++#include "en/port.h" + + enum { + MLX5E_ST_LINK_STATE, +@@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct + + static int mlx5e_test_link_speed(struct mlx5e_priv *priv) + { +- u32 out[MLX5_ST_SZ_DW(ptys_reg)]; +- u32 eth_proto_oper; +- int i; ++ u32 speed; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + +- if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) +- return 1; +- +- eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); +- for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) { +- if (eth_proto_oper & MLX5E_PROT_MASK(i)) +- return 0; +- } +- return 1; ++ return mlx5e_port_linkspeed(priv->mdev, &speed); + } + + struct mlx5ehdr { diff --git a/queue-5.3/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch b/queue-5.3/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch new file mode 100644 index 00000000000..500624184f9 --- /dev/null +++ b/queue-5.3/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch @@ -0,0 +1,57 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Maxim Mikityanskiy +Date: Mon, 16 Sep 2019 14:54:20 +0300 +Subject: net/mlx5e: Fix handling of compressed CQEs in case of low NAPI budget + +From: Maxim Mikityanskiy + +[ Upstream commit 9df86bdb6746d7fcfc2fda715f7a7c3d0ddb2654 ] + +When CQE compression is enabled, compressed CQEs use the following +structure: a title is followed by one or many blocks, each containing 8 +mini CQEs (except the last, which may contain fewer mini CQEs). + +Due to NAPI budget restriction, a complete structure is not always +parsed in one NAPI run, and some blocks with mini CQEs may be deferred +to the next NAPI poll call - we have the mlx5e_decompress_cqes_cont call +in the beginning of mlx5e_poll_rx_cq. However, if the budget is +extremely low, some blocks may be left even after that, but the code +that follows the mlx5e_decompress_cqes_cont call doesn't check it and +assumes that a new CQE begins, which may not be the case. In such cases, +random memory corruptions occur. + +An extremely low NAPI budget of 8 is used when busy_poll or busy_read is +active. + +This commit adds a check to make sure that the previous compressed CQE +has been completely parsed after mlx5e_decompress_cqes_cont, otherwise +it prevents a new CQE from being fetched in the middle of a compressed +CQE. + +This commit fixes random crashes in __build_skb, __page_pool_put_page +and other not-related-directly places, that used to happen when both CQE +compression and busy_poll/busy_read were enabled. + +Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") +Signed-off-by: Maxim Mikityanskiy +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -1367,8 +1367,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return 0; + +- if (rq->cqd.left) ++ if (rq->cqd.left) { + work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); ++ if (rq->cqd.left || work_done >= budget) ++ goto out; ++ } + + cqe = mlx5_cqwq_get_cqe(cqwq); + if (!cqe) { diff --git a/queue-5.3/net-mlx5e-initialize-on-stack-link-modes-bitmap.patch b/queue-5.3/net-mlx5e-initialize-on-stack-link-modes-bitmap.patch new file mode 100644 index 00000000000..0744e2e683a --- /dev/null +++ b/queue-5.3/net-mlx5e-initialize-on-stack-link-modes-bitmap.patch @@ -0,0 +1,31 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Aya Levin +Date: Wed, 23 Oct 2019 12:57:54 +0300 +Subject: net/mlx5e: Initialize on stack link modes bitmap + +From: Aya Levin + +[ Upstream commit 926b37f76fb0a22fe93c8873c819fd167180e85c ] + +Initialize link modes bitmap on stack before using it, otherwise the +outcome of ethtool set link ksettings might have unexpected values. + +Fixes: 4b95840a6ced ("net/mlx5e: Fix matching of speed to PRM link modes") +Signed-off-by: Aya Levin +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +@@ -1021,7 +1021,7 @@ static bool ext_link_mode_requested(cons + { + #define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT + int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT; +- __ETHTOOL_DECLARE_LINK_MODE_MASK(modes); ++ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,}; + + bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size); + return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/queue-5.3/net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch b/queue-5.3/net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch new file mode 100644 index 00000000000..2246b9d20d2 --- /dev/null +++ b/queue-5.3/net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch @@ -0,0 +1,33 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Dmytro Linkin +Date: Thu, 29 Aug 2019 15:24:27 +0000 +Subject: net/mlx5e: Remove incorrect match criteria assignment line + +From: Dmytro Linkin + +[ Upstream commit 752d3dc06d6936d5a357a18b6b51d91c7e134e88 ] + +Driver have function, which enable match criteria for misc parameters +in dependence of eswitch capabilities. + +Fixes: 4f5d1beadc10 ("Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux") +Signed-off-by: Dmytro Linkin +Reviewed-by: Jianbo Liu +Reviewed-by: Roi Dayan +Reviewed-by: Saeed Mahameed +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -285,7 +285,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_es + + mlx5_eswitch_set_rule_source_port(esw, spec, attr); + +- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + diff --git a/queue-5.3/net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch b/queue-5.3/net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch new file mode 100644 index 00000000000..d51c15218b4 --- /dev/null +++ b/queue-5.3/net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch @@ -0,0 +1,34 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Jakub Kicinski +Date: Fri, 18 Oct 2019 09:16:58 -0700 +Subject: net: netem: correct the parent's backlog when corrupted packet was dropped + +From: Jakub Kicinski + +[ Upstream commit e0ad032e144731a5928f2d75e91c2064ba1a764c ] + +If packet corruption failed we jump to finish_segs and return +NET_XMIT_SUCCESS. Seeing success will make the parent qdisc +increment its backlog, that's incorrect - we need to return +NET_XMIT_DROP. + +Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue") +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_netem.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -616,6 +616,8 @@ finish_segs: + } + /* Parent qdiscs accounted for 1 skb of size @prev_len */ + qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); ++ } else if (!skb) { ++ return NET_XMIT_DROP; + } + return NET_XMIT_SUCCESS; + } diff --git a/queue-5.3/net-netem-fix-error-path-for-corrupted-gso-frames.patch b/queue-5.3/net-netem-fix-error-path-for-corrupted-gso-frames.patch new file mode 100644 index 00000000000..3c01fe41cce --- /dev/null +++ b/queue-5.3/net-netem-fix-error-path-for-corrupted-gso-frames.patch @@ -0,0 +1,69 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Jakub Kicinski +Date: Fri, 18 Oct 2019 09:16:57 -0700 +Subject: net: netem: fix error path for corrupted GSO frames + +From: Jakub Kicinski + +[ Upstream commit a7fa12d15855904aff1716e1fc723c03ba38c5cc ] + +To corrupt a GSO frame we first perform segmentation. We then +proceed using the first segment instead of the full GSO skb and +requeue the rest of the segments as separate packets. + +If there are any issues with processing the first segment we +still want to process the rest, therefore we jump to the +finish_segs label. + +Commit 177b8007463c ("net: netem: fix backlog accounting for +corrupted GSO frames") started using the pointer to the first +segment in the "rest of segments processing", but as mentioned +above the first segment may had already been freed at this point. + +Backlog corrections for parent qdiscs have to be adjusted. + +Fixes: 177b8007463c ("net: netem: fix backlog accounting for corrupted GSO frames") +Reported-by: kbuild test robot +Reported-by: Dan Carpenter +Reported-by: Ben Hutchings +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_netem.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -509,6 +509,7 @@ static int netem_enqueue(struct sk_buff + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) { + qdisc_drop(skb, sch, to_free); ++ skb = NULL; + goto finish_segs; + } + +@@ -593,9 +594,10 @@ static int netem_enqueue(struct sk_buff + finish_segs: + if (segs) { + unsigned int len, last_len; +- int nb = 0; ++ int nb; + +- len = skb->len; ++ len = skb ? skb->len : 0; ++ nb = skb ? 1 : 0; + + while (segs) { + skb2 = segs->next; +@@ -612,7 +614,8 @@ finish_segs: + } + segs = skb2; + } +- qdisc_tree_reduce_backlog(sch, -nb, prev_len - len); ++ /* Parent qdiscs accounted for 1 skb of size @prev_len */ ++ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); + } + return NET_XMIT_SUCCESS; + } diff --git a/queue-5.3/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch b/queue-5.3/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch new file mode 100644 index 00000000000..c09de051ced --- /dev/null +++ b/queue-5.3/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch @@ -0,0 +1,35 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Doug Berger +Date: Wed, 16 Oct 2019 16:06:30 -0700 +Subject: net: phy: bcm7xxx: define soft_reset for 40nm EPHY + +From: Doug Berger + +[ Upstream commit fe586b823372a9f43f90e2c6aa0573992ce7ccb7 ] + +The internal 40nm EPHYs use a "Workaround for putting the PHY in +IDDQ mode." These PHYs require a soft reset to restore functionality +after they are powered back up. + +This commit defines the soft_reset function to use genphy_soft_reset +during phy_init_hw to accommodate this. + +Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset") +Signed-off-by: Doug Berger +Acked-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/bcm7xxx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/phy/bcm7xxx.c ++++ b/drivers/net/phy/bcm7xxx.c +@@ -572,6 +572,7 @@ static int bcm7xxx_28nm_probe(struct phy + .name = _name, \ + /* PHY_BASIC_FEATURES */ \ + .flags = PHY_IS_INTERNAL, \ ++ .soft_reset = genphy_soft_reset, \ + .config_init = bcm7xxx_config_init, \ + .suspend = bcm7xxx_suspend, \ + .resume = bcm7xxx_config_init, \ diff --git a/queue-5.3/net-phylink-fix-phylink_dbg-macro.patch b/queue-5.3/net-phylink-fix-phylink_dbg-macro.patch new file mode 100644 index 00000000000..cab2f97b7f5 --- /dev/null +++ b/queue-5.3/net-phylink-fix-phylink_dbg-macro.patch @@ -0,0 +1,49 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Florian Fainelli +Date: Thu, 31 Oct 2019 15:42:26 -0700 +Subject: net: phylink: Fix phylink_dbg() macro + +From: Florian Fainelli + +[ Upstream commit 9d68db5092c5fac99fccfdeab3f04df0b27d1762 ] + +The phylink_dbg() macro does not follow dynamic debug or defined(DEBUG) +and as a result, it spams the kernel log since a PR_DEBUG level is +currently used. Fix it to be defined appropriately whether +CONFIG_DYNAMIC_DEBUG or defined(DEBUG) are set. + +Fixes: 17091180b152 ("net: phylink: Add phylink_{printk, err, warn, info, dbg} macros") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phylink.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -87,8 +87,24 @@ struct phylink { + phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__) + #define phylink_info(pl, fmt, ...) \ + phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__) ++#if defined(CONFIG_DYNAMIC_DEBUG) + #define phylink_dbg(pl, fmt, ...) \ ++do { \ ++ if ((pl)->config->type == PHYLINK_NETDEV) \ ++ netdev_dbg((pl)->netdev, fmt, ##__VA_ARGS__); \ ++ else if ((pl)->config->type == PHYLINK_DEV) \ ++ dev_dbg((pl)->dev, fmt, ##__VA_ARGS__); \ ++} while (0) ++#elif defined(DEBUG) ++#define phylink_dbg(pl, fmt, ...) \ + phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__) ++#else ++#define phylink_dbg(pl, fmt, ...) \ ++({ \ ++ if (0) \ ++ phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__); \ ++}) ++#endif + + /** + * phylink_set_port_modes() - set the port type modes in the ethtool mask diff --git a/queue-5.3/net-reorder-struct-net-fields-to-avoid-false-sharing.patch b/queue-5.3/net-reorder-struct-net-fields-to-avoid-false-sharing.patch new file mode 100644 index 00000000000..cd6acedf945 --- /dev/null +++ b/queue-5.3/net-reorder-struct-net-fields-to-avoid-false-sharing.patch @@ -0,0 +1,113 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Fri, 18 Oct 2019 15:20:05 -0700 +Subject: net: reorder 'struct net' fields to avoid false sharing + +From: Eric Dumazet + +[ Upstream commit 2a06b8982f8f2f40d03a3daf634676386bd84dbc ] + +Intel test robot reported a ~7% regression on TCP_CRR tests +that they bisected to the cited commit. + +Indeed, every time a new TCP socket is created or deleted, +the atomic counter net->count is touched (via get_net(net) +and put_net(net) calls) + +So cpus might have to reload a contended cache line in +net_hash_mix(net) calls. + +We need to reorder 'struct net' fields to move @hash_mix +in a read mostly cache line. + +We move in the first cache line fields that can be +dirtied often. + +We probably will have to address in a followup patch +the __randomize_layout that was added in linux-4.13, +since this might break our placement choices. + +Fixes: 355b98553789 ("netns: provide pure entropy for net_hash_mix()") +Signed-off-by: Eric Dumazet +Reported-by: kernel test robot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/net_namespace.h | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -52,6 +52,9 @@ struct bpf_prog; + #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) + + struct net { ++ /* First cache line can be often dirtied. ++ * Do not place here read-mostly fields. ++ */ + refcount_t passive; /* To decide when the network + * namespace should be freed. + */ +@@ -60,7 +63,13 @@ struct net { + */ + spinlock_t rules_mod_lock; + +- u32 hash_mix; ++ unsigned int dev_unreg_count; ++ ++ unsigned int dev_base_seq; /* protected by rtnl_mutex */ ++ int ifindex; ++ ++ spinlock_t nsid_lock; ++ atomic_t fnhe_genid; + + struct list_head list; /* list of network namespaces */ + struct list_head exit_list; /* To linked to call pernet exit +@@ -76,11 +85,11 @@ struct net { + #endif + struct user_namespace *user_ns; /* Owning user namespace */ + struct ucounts *ucounts; +- spinlock_t nsid_lock; + struct idr netns_ids; + + struct ns_common ns; + ++ struct list_head dev_base_head; + struct proc_dir_entry *proc_net; + struct proc_dir_entry *proc_net_stat; + +@@ -93,12 +102,14 @@ struct net { + + struct uevent_sock *uevent_sock; /* uevent socket */ + +- struct list_head dev_base_head; + struct hlist_head *dev_name_head; + struct hlist_head *dev_index_head; +- unsigned int dev_base_seq; /* protected by rtnl_mutex */ +- int ifindex; +- unsigned int dev_unreg_count; ++ /* Note that @hash_mix can be read millions times per second, ++ * it is critical that it is on a read_mostly cache line. ++ */ ++ u32 hash_mix; ++ ++ struct net_device *loopback_dev; /* The loopback */ + + /* core fib_rules */ + struct list_head rules_ops; +@@ -106,7 +117,6 @@ struct net { + struct list_head fib_notifier_ops; /* Populated by + * register_pernet_subsys() + */ +- struct net_device *loopback_dev; /* The loopback */ + struct netns_core core; + struct netns_mib mib; + struct netns_packet packet; +@@ -171,7 +181,6 @@ struct net { + struct netns_xdp xdp; + #endif + struct sock *diag_nlsk; +- atomic_t fnhe_genid; + } __randomize_layout; + + #include diff --git a/queue-5.3/net-rtnetlink-fix-a-typo-fbd-fdb.patch b/queue-5.3/net-rtnetlink-fix-a-typo-fbd-fdb.patch new file mode 100644 index 00000000000..e6e78761bb5 --- /dev/null +++ b/queue-5.3/net-rtnetlink-fix-a-typo-fbd-fdb.patch @@ -0,0 +1,32 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Nikolay Aleksandrov +Date: Tue, 29 Oct 2019 13:59:32 +0200 +Subject: net: rtnetlink: fix a typo fbd -> fdb + +From: Nikolay Aleksandrov + +[ Upstream commit 8b73018fe44521c1cf59d7bac53624c87d3f10e2 ] + +A simple typo fix in the nl error message (fbd -> fdb). + +CC: David Ahern +Fixes: 8c6e137fbc7f ("rtnetlink: Update rtnl_fdb_dump for strict data checking") +Signed-off-by: Nikolay Aleksandrov +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3916,7 +3916,7 @@ static int valid_fdb_dump_strict(const s + ndm = nlmsg_data(nlh); + if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || + ndm->ndm_flags || ndm->ndm_type) { +- NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request"); ++ NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request"); + return -EINVAL; + } + diff --git a/queue-5.3/net-smc-fix-closing-of-fallback-smc-sockets.patch b/queue-5.3/net-smc-fix-closing-of-fallback-smc-sockets.patch new file mode 100644 index 00000000000..36226199e0a --- /dev/null +++ b/queue-5.3/net-smc-fix-closing-of-fallback-smc-sockets.patch @@ -0,0 +1,52 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Ursula Braun +Date: Wed, 23 Oct 2019 15:44:05 +0200 +Subject: net/smc: fix closing of fallback SMC sockets + +From: Ursula Braun + +[ Upstream commit f536dffc0b79738c3104af999318279dccbaa261 ] + +For SMC sockets forced to fallback to TCP, the file is propagated +from the outer SMC to the internal TCP socket. When closing the SMC +socket, the internal TCP socket file pointer must be restored to the +original NULL value, otherwise memory leaks may show up (found with +CONFIG_DEBUG_KMEMLEAK). + +The internal TCP socket is released in smc_clcsock_release(), which +calls __sock_release() function in net/socket.c. This calls the +needed iput(SOCK_INODE(sock)) only, if the file pointer has been reset +to the original NULL-value. + +Fixes: 07603b230895 ("net/smc: propagate file from SMC to TCP socket") +Signed-off-by: Ursula Braun +Signed-off-by: Karsten Graul +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/af_smc.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -123,6 +123,12 @@ struct proto smc_proto6 = { + }; + EXPORT_SYMBOL_GPL(smc_proto6); + ++static void smc_restore_fallback_changes(struct smc_sock *smc) ++{ ++ smc->clcsock->file->private_data = smc->sk.sk_socket; ++ smc->clcsock->file = NULL; ++} ++ + static int __smc_release(struct smc_sock *smc) + { + struct sock *sk = &smc->sk; +@@ -141,6 +147,7 @@ static int __smc_release(struct smc_sock + } + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); ++ smc_restore_fallback_changes(smc); + } + + sk->sk_prot->unhash(sk); diff --git a/queue-5.3/net-smc-fix-refcounting-for-non-blocking-connect.patch b/queue-5.3/net-smc-fix-refcounting-for-non-blocking-connect.patch new file mode 100644 index 00000000000..2d74978ff8a --- /dev/null +++ b/queue-5.3/net-smc-fix-refcounting-for-non-blocking-connect.patch @@ -0,0 +1,45 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Ursula Braun +Date: Tue, 29 Oct 2019 12:41:26 +0100 +Subject: net/smc: fix refcounting for non-blocking connect() + +From: Ursula Braun + +[ Upstream commit 301428ea3708188dc4a243e6e6b46c03b46a0fbc ] + +If a nonblocking socket is immediately closed after connect(), +the connect worker may not have started. This results in a refcount +problem, since sock_hold() is called from the connect worker. +This patch moves the sock_hold in front of the connect worker +scheduling. + +Reported-by: syzbot+4c063e6dea39e4b79f29@syzkaller.appspotmail.com +Fixes: 50717a37db03 ("net/smc: nonblocking connect rework") +Reviewed-by: Karsten Graul +Signed-off-by: Ursula Braun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/af_smc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -707,8 +707,6 @@ static int __smc_connect(struct smc_sock + int smc_type; + int rc = 0; + +- sock_hold(&smc->sk); /* sock put in passive closing */ +- + if (smc->use_fallback) + return smc_connect_fallback(smc, smc->fallback_rsn); + +@@ -853,6 +851,8 @@ static int smc_connect(struct socket *so + rc = kernel_connect(smc->clcsock, addr, alen, flags); + if (rc && rc != -EINPROGRESS) + goto out; ++ ++ sock_hold(&smc->sk); /* sock put in passive closing */ + if (flags & O_NONBLOCK) { + if (schedule_work(&smc->connect_work)) + smc->connect_nonblock = 1; diff --git a/queue-5.3/net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch b/queue-5.3/net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch new file mode 100644 index 00000000000..d883dbfbd60 --- /dev/null +++ b/queue-5.3/net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch @@ -0,0 +1,37 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Ursula Braun +Date: Wed, 23 Oct 2019 15:44:06 +0200 +Subject: net/smc: keep vlan_id for SMC-R in smc_listen_work() + +From: Ursula Braun + +[ Upstream commit ca5f8d2dd5229ccacdd5cfde1ce4d32b0810e454 ] + +Creating of an SMC-R connection with vlan-id fails, because +smc_listen_work() determines the vlan_id of the connection, +saves it in struct smc_init_info ini, but clears the ini area +again if SMC-D is not applicable. +This patch just resets the ISM device before investigating +SMC-R availability. + +Fixes: bc36d2fc93eb ("net/smc: consolidate function parameters") +Signed-off-by: Ursula Braun +Signed-off-by: Karsten Graul +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/af_smc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -1298,8 +1298,8 @@ static void smc_listen_work(struct work_ + /* check if RDMA is available */ + if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ + /* prepare RDMA check */ +- memset(&ini, 0, sizeof(ini)); + ini.is_smcd = false; ++ ini.ism_dev = NULL; + ini.ib_lcl = &pclc->lcl; + rc = smc_find_rdma_device(new_smc, &ini); + if (rc) { diff --git a/queue-5.3/net-usb-lan78xx-connect-phy-before-registering-mac.patch b/queue-5.3/net-usb-lan78xx-connect-phy-before-registering-mac.patch new file mode 100644 index 00000000000..a9b0cdbcd97 --- /dev/null +++ b/queue-5.3/net-usb-lan78xx-connect-phy-before-registering-mac.patch @@ -0,0 +1,60 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Andrew Lunn +Date: Thu, 17 Oct 2019 21:29:26 +0200 +Subject: net: usb: lan78xx: Connect PHY before registering MAC + +From: Andrew Lunn + +[ Upstream commit 38b4fe320119859c11b1dc06f6b4987a16344fa1 ] + +As soon as the netdev is registers, the kernel can start using the +interface. If the driver connects the MAC to the PHY after the netdev +is registered, there is a race condition where the interface can be +opened without having the PHY connected. + +Change the order to close this race condition. + +Fixes: 92571a1aae40 ("lan78xx: Connect phy early") +Reported-by: Daniel Wagner +Signed-off-by: Andrew Lunn +Tested-by: Daniel Wagner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -3792,10 +3792,14 @@ static int lan78xx_probe(struct usb_inte + /* driver requires remote-wakeup capability during autosuspend. */ + intf->needs_remote_wakeup = 1; + ++ ret = lan78xx_phy_init(dev); ++ if (ret < 0) ++ goto out4; ++ + ret = register_netdev(netdev); + if (ret != 0) { + netif_err(dev, probe, netdev, "couldn't register the device\n"); +- goto out4; ++ goto out5; + } + + usb_set_intfdata(intf, dev); +@@ -3808,14 +3812,10 @@ static int lan78xx_probe(struct usb_inte + pm_runtime_set_autosuspend_delay(&udev->dev, + DEFAULT_AUTOSUSPEND_DELAY); + +- ret = lan78xx_phy_init(dev); +- if (ret < 0) +- goto out5; +- + return 0; + + out5: +- unregister_netdev(netdev); ++ phy_disconnect(netdev->phydev); + out4: + usb_free_urb(dev->urb_intr); + out3: diff --git a/queue-5.3/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch b/queue-5.3/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch new file mode 100644 index 00000000000..9ae5723bf17 --- /dev/null +++ b/queue-5.3/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch @@ -0,0 +1,92 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Daniel Wagner +Date: Fri, 25 Oct 2019 10:04:13 +0200 +Subject: net: usb: lan78xx: Disable interrupts before calling generic_handle_irq() + +From: Daniel Wagner + +[ Upstream commit 0a29ac5bd3a988dc151c8d26910dec2557421f64 ] + +lan78xx_status() will run with interrupts enabled due to the change in +ed194d136769 ("usb: core: remove local_irq_save() around ->complete() +handler"). generic_handle_irq() expects to be run with IRQs disabled. + +[ 4.886203] 000: irq 79 handler irq_default_primary_handler+0x0/0x8 enabled interrupts +[ 4.886243] 000: WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:152 __handle_irq_event_percpu+0x154/0x168 +[ 4.896294] 000: Modules linked in: +[ 4.896301] 000: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.6 #39 +[ 4.896310] 000: Hardware name: Raspberry Pi 3 Model B+ (DT) +[ 4.896315] 000: pstate: 60000005 (nZCv daif -PAN -UAO) +[ 4.896321] 000: pc : __handle_irq_event_percpu+0x154/0x168 +[ 4.896331] 000: lr : __handle_irq_event_percpu+0x154/0x168 +[ 4.896339] 000: sp : ffff000010003cc0 +[ 4.896346] 000: x29: ffff000010003cc0 x28: 0000000000000060 +[ 4.896355] 000: x27: ffff000011021980 x26: ffff00001189c72b +[ 4.896364] 000: x25: ffff000011702bc0 x24: ffff800036d6e400 +[ 4.896373] 000: x23: 000000000000004f x22: ffff000010003d64 +[ 4.896381] 000: x21: 0000000000000000 x20: 0000000000000002 +[ 4.896390] 000: x19: ffff8000371c8480 x18: 0000000000000060 +[ 4.896398] 000: x17: 0000000000000000 x16: 00000000000000eb +[ 4.896406] 000: x15: ffff000011712d18 x14: 7265746e69206465 +[ 4.896414] 000: x13: ffff000010003ba0 x12: ffff000011712df0 +[ 4.896422] 000: x11: 0000000000000001 x10: ffff000011712e08 +[ 4.896430] 000: x9 : 0000000000000001 x8 : 000000000003c920 +[ 4.896437] 000: x7 : ffff0000118cc410 x6 : ffff0000118c7f00 +[ 4.896445] 000: x5 : 000000000003c920 x4 : 0000000000004510 +[ 4.896453] 000: x3 : ffff000011712dc8 x2 : 0000000000000000 +[ 4.896461] 000: x1 : 73a3f67df94c1500 x0 : 0000000000000000 +[ 4.896466] 000: Call trace: +[ 4.896471] 000: __handle_irq_event_percpu+0x154/0x168 +[ 4.896481] 000: handle_irq_event_percpu+0x50/0xb0 +[ 4.896489] 000: handle_irq_event+0x40/0x98 +[ 4.896497] 000: handle_simple_irq+0xa4/0xf0 +[ 4.896505] 000: generic_handle_irq+0x24/0x38 +[ 4.896513] 000: intr_complete+0xb0/0xe0 +[ 4.896525] 000: __usb_hcd_giveback_urb+0x58/0xd8 +[ 4.896533] 000: usb_giveback_urb_bh+0xd0/0x170 +[ 4.896539] 000: tasklet_action_common.isra.0+0x9c/0x128 +[ 4.896549] 000: tasklet_hi_action+0x24/0x30 +[ 4.896556] 000: __do_softirq+0x120/0x23c +[ 4.896564] 000: irq_exit+0xb8/0xd8 +[ 4.896571] 000: __handle_domain_irq+0x64/0xb8 +[ 4.896579] 000: bcm2836_arm_irqchip_handle_irq+0x60/0xc0 +[ 4.896586] 000: el1_irq+0xb8/0x140 +[ 4.896592] 000: arch_cpu_idle+0x10/0x18 +[ 4.896601] 000: do_idle+0x200/0x280 +[ 4.896608] 000: cpu_startup_entry+0x20/0x28 +[ 4.896615] 000: rest_init+0xb4/0xc0 +[ 4.896623] 000: arch_call_rest_init+0xc/0x14 +[ 4.896632] 000: start_kernel+0x454/0x480 + +Fixes: ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler") +Cc: Woojung Huh +Cc: Marc Zyngier +Cc: Andrew Lunn +Cc: Stefan Wahren +Cc: Jisheng Zhang +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Cc: David Miller +Signed-off-by: Daniel Wagner +Tested-by: Stefan Wahren +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -1265,8 +1265,11 @@ static void lan78xx_status(struct lan78x + netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); + lan78xx_defer_kevent(dev, EVENT_LINK_RESET); + +- if (dev->domain_data.phyirq > 0) ++ if (dev->domain_data.phyirq > 0) { ++ local_irq_disable(); + generic_handle_irq(dev->domain_data.phyirq); ++ local_irq_enable(); ++ } + } else + netdev_warn(dev->net, + "unexpected interrupt: 0x%08x\n", intdata); diff --git a/queue-5.3/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch b/queue-5.3/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch new file mode 100644 index 00000000000..bf4e04e9124 --- /dev/null +++ b/queue-5.3/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch @@ -0,0 +1,81 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 23 Oct 2019 22:44:51 -0700 +Subject: net: use skb_queue_empty_lockless() in busy poll contexts + +From: Eric Dumazet + +[ Upstream commit 3f926af3f4d688e2e11e7f8ed04e277a14d4d4a4 ] + +Busy polling usually runs without locks. +Let's use skb_queue_empty_lockless() instead of skb_queue_empty() + +Also uses READ_ONCE() in __skb_try_recv_datagram() to address +a similar potential problem. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/chelsio/chtls/chtls_io.c | 2 +- + net/core/datagram.c | 2 +- + net/core/sock.c | 2 +- + net/ipv4/tcp.c | 2 +- + net/sctp/socket.c | 2 +- + 5 files changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/crypto/chelsio/chtls/chtls_io.c ++++ b/drivers/crypto/chelsio/chtls/chtls_io.c +@@ -1701,7 +1701,7 @@ int chtls_recvmsg(struct sock *sk, struc + return peekmsg(sk, msg, len, nonblock, flags); + + if (sk_can_busy_loop(sk) && +- skb_queue_empty(&sk->sk_receive_queue) && ++ skb_queue_empty_lockless(&sk->sk_receive_queue) && + sk->sk_state == TCP_ESTABLISHED) + sk_busy_loop(sk, nonblock); + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -278,7 +278,7 @@ struct sk_buff *__skb_try_recv_datagram( + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); +- } while (sk->sk_receive_queue.prev != *last); ++ } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); + + error = -EAGAIN; + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -3593,7 +3593,7 @@ bool sk_busy_loop_end(void *p, unsigned + { + struct sock *sk = p; + +- return !skb_queue_empty(&sk->sk_receive_queue) || ++ return !skb_queue_empty_lockless(&sk->sk_receive_queue) || + sk_busy_loop_timeout(sk, start_time); + } + EXPORT_SYMBOL(sk_busy_loop_end); +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1961,7 +1961,7 @@ int tcp_recvmsg(struct sock *sk, struct + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + +- if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && ++ if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && + (sk->sk_state == TCP_ESTABLISHED)) + sk_busy_loop(sk, nonblock); + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -8724,7 +8724,7 @@ struct sk_buff *sctp_skb_recv_datagram(s + if (sk_can_busy_loop(sk)) { + sk_busy_loop(sk, noblock); + +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + continue; + } + diff --git a/queue-5.3/net-use-skb_queue_empty_lockless-in-poll-handlers.patch b/queue-5.3/net-use-skb_queue_empty_lockless-in-poll-handlers.patch new file mode 100644 index 00000000000..59f4fbdc756 --- /dev/null +++ b/queue-5.3/net-use-skb_queue_empty_lockless-in-poll-handlers.patch @@ -0,0 +1,248 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 23 Oct 2019 22:44:50 -0700 +Subject: net: use skb_queue_empty_lockless() in poll() handlers + +From: Eric Dumazet + +[ Upstream commit 3ef7cf57c72f32f61e97f8fa401bc39ea1f1a5d4 ] + +Many poll() handlers are lockless. Using skb_queue_empty_lockless() +instead of skb_queue_empty() is more appropriate. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/capi/capi.c | 2 +- + net/atm/common.c | 2 +- + net/bluetooth/af_bluetooth.c | 4 ++-- + net/caif/caif_socket.c | 2 +- + net/core/datagram.c | 4 ++-- + net/decnet/af_decnet.c | 2 +- + net/ipv4/tcp.c | 2 +- + net/ipv4/udp.c | 2 +- + net/nfc/llcp_sock.c | 4 ++-- + net/phonet/socket.c | 4 ++-- + net/sctp/socket.c | 4 ++-- + net/tipc/socket.c | 4 ++-- + net/unix/af_unix.c | 6 +++--- + net/vmw_vsock/af_vsock.c | 2 +- + 14 files changed, 22 insertions(+), 22 deletions(-) + +--- a/drivers/isdn/capi/capi.c ++++ b/drivers/isdn/capi/capi.c +@@ -744,7 +744,7 @@ capi_poll(struct file *file, poll_table + + poll_wait(file, &(cdev->recvwait), wait); + mask = EPOLLOUT | EPOLLWRNORM; +- if (!skb_queue_empty(&cdev->recvqueue)) ++ if (!skb_queue_empty_lockless(&cdev->recvqueue)) + mask |= EPOLLIN | EPOLLRDNORM; + return mask; + } +--- a/net/atm/common.c ++++ b/net/atm/common.c +@@ -668,7 +668,7 @@ __poll_t vcc_poll(struct file *file, str + mask |= EPOLLHUP; + + /* readable? */ +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* writable? */ +--- a/net/bluetooth/af_bluetooth.c ++++ b/net/bluetooth/af_bluetooth.c +@@ -460,7 +460,7 @@ __poll_t bt_sock_poll(struct file *file, + if (sk->sk_state == BT_LISTEN) + return bt_accept_poll(sk); + +- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + +@@ -470,7 +470,7 @@ __poll_t bt_sock_poll(struct file *file, + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= EPOLLHUP; + +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + if (sk->sk_state == BT_CLOSED) +--- a/net/caif/caif_socket.c ++++ b/net/caif/caif_socket.c +@@ -953,7 +953,7 @@ static __poll_t caif_poll(struct file *f + mask |= EPOLLRDHUP; + + /* readable? */ +- if (!skb_queue_empty(&sk->sk_receive_queue) || ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= EPOLLIN | EPOLLRDNORM; + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -767,7 +767,7 @@ __poll_t datagram_poll(struct file *file + mask = 0; + + /* exceptional events? */ +- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + +@@ -777,7 +777,7 @@ __poll_t datagram_poll(struct file *file + mask |= EPOLLHUP; + + /* readable? */ +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* Connection-based need to check for termination and startup */ +--- a/net/decnet/af_decnet.c ++++ b/net/decnet/af_decnet.c +@@ -1205,7 +1205,7 @@ static __poll_t dn_poll(struct file *fil + struct dn_scp *scp = DN_SK(sk); + __poll_t mask = datagram_poll(file, sock, wait); + +- if (!skb_queue_empty(&scp->other_receive_queue)) ++ if (!skb_queue_empty_lockless(&scp->other_receive_queue)) + mask |= EPOLLRDBAND; + + return mask; +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -584,7 +584,7 @@ __poll_t tcp_poll(struct file *file, str + } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); +- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR; + + return mask; +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2723,7 +2723,7 @@ __poll_t udp_poll(struct file *file, str + __poll_t mask = datagram_poll(file, sock, wait); + struct sock *sk = sock->sk; + +- if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) ++ if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* Check for false positives due to checksum errors */ +--- a/net/nfc/llcp_sock.c ++++ b/net/nfc/llcp_sock.c +@@ -554,11 +554,11 @@ static __poll_t llcp_sock_poll(struct fi + if (sk->sk_state == LLCP_LISTEN) + return llcp_accept_poll(sk); + +- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + if (sk->sk_state == LLCP_CLOSED) +--- a/net/phonet/socket.c ++++ b/net/phonet/socket.c +@@ -338,9 +338,9 @@ static __poll_t pn_socket_poll(struct fi + + if (sk->sk_state == TCP_CLOSE) + return EPOLLERR; +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; +- if (!skb_queue_empty(&pn->ctrlreq_queue)) ++ if (!skb_queue_empty_lockless(&pn->ctrlreq_queue)) + mask |= EPOLLPRI; + if (!mask && sk->sk_state == TCP_CLOSE_WAIT) + return EPOLLHUP; +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -8329,7 +8329,7 @@ __poll_t sctp_poll(struct file *file, st + mask = 0; + + /* Is there any exceptional events? */ +- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) +@@ -8338,7 +8338,7 @@ __poll_t sctp_poll(struct file *file, st + mask |= EPOLLHUP; + + /* Is it readable? Reconsider this code with TCP-style support. */ +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* The association is either gone or not ready. */ +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -740,7 +740,7 @@ static __poll_t tipc_poll(struct file *f + /* fall through */ + case TIPC_LISTEN: + case TIPC_CONNECTING: +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + revents |= EPOLLIN | EPOLLRDNORM; + break; + case TIPC_OPEN: +@@ -748,7 +748,7 @@ static __poll_t tipc_poll(struct file *f + revents |= EPOLLOUT; + if (!tipc_sk_type_connectionless(sk)) + break; +- if (skb_queue_empty(&sk->sk_receive_queue)) ++ if (skb_queue_empty_lockless(&sk->sk_receive_queue)) + break; + revents |= EPOLLIN | EPOLLRDNORM; + break; +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2599,7 +2599,7 @@ static __poll_t unix_poll(struct file *f + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; + + /* readable? */ +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* Connection-based need to check for termination and startup */ +@@ -2628,7 +2628,7 @@ static __poll_t unix_dgram_poll(struct f + mask = 0; + + /* exceptional events? */ +- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); + +@@ -2638,7 +2638,7 @@ static __poll_t unix_dgram_poll(struct f + mask |= EPOLLHUP; + + /* readable? */ +- if (!skb_queue_empty(&sk->sk_receive_queue)) ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* Connection-based need to check for termination and startup */ +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -870,7 +870,7 @@ static __poll_t vsock_poll(struct file * + * the queue and write as long as the socket isn't shutdown for + * sending. + */ +- if (!skb_queue_empty(&sk->sk_receive_queue) || ++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= EPOLLIN | EPOLLRDNORM; + } diff --git a/queue-5.3/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch b/queue-5.3/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch new file mode 100644 index 00000000000..a247d6df9a0 --- /dev/null +++ b/queue-5.3/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch @@ -0,0 +1,36 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: zhanglin +Date: Sat, 26 Oct 2019 15:54:16 +0800 +Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol() + +From: zhanglin + +[ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ] + +memset() the structure ethtool_wolinfo that has padded bytes +but the padded bytes have not been zeroed out. + +Signed-off-by: zhanglin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/ethtool.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -1395,11 +1395,13 @@ static int ethtool_reset(struct net_devi + + static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) + { +- struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; ++ struct ethtool_wolinfo wol; + + if (!dev->ethtool_ops->get_wol) + return -EOPNOTSUPP; + ++ memset(&wol, 0, sizeof(struct ethtool_wolinfo)); ++ wol.cmd = ETHTOOL_GWOL; + dev->ethtool_ops->get_wol(dev, &wol); + + if (copy_to_user(useraddr, &wol, sizeof(wol))) diff --git a/queue-5.3/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch b/queue-5.3/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch new file mode 100644 index 00000000000..ee349424130 --- /dev/null +++ b/queue-5.3/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch @@ -0,0 +1,286 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Guillaume Nault +Date: Wed, 23 Oct 2019 18:39:04 +0200 +Subject: netns: fix GFP flags in rtnl_net_notifyid() + +From: Guillaume Nault + +[ Upstream commit d4e4fdf9e4a27c87edb79b1478955075be141f67 ] + +In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to +rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances, +but there are a few paths calling rtnl_net_notifyid() from atomic +context or from RCU critical sections. The later also precludes the use +of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new() +call is wrong too, as it uses GFP_KERNEL unconditionally. + +Therefore, we need to pass the GFP flags as parameter and propagate it +through function calls until the proper flags can be determined. + +In most cases, GFP_KERNEL is fine. The exceptions are: + * openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump() + indirectly call rtnl_net_notifyid() from RCU critical section, + + * rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as + parameter. + +Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used +by nlmsg_new(). The function is allowed to sleep, so better make the +flags consistent with the ones used in the following +ovs_vport_cmd_fill_info() call. + +Found by code inspection. + +Fixes: 9a9634545c70 ("netns: notify netns id events") +Signed-off-by: Guillaume Nault +Acked-by: Nicolas Dichtel +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/net_namespace.h | 2 +- + net/core/dev.c | 2 +- + net/core/net_namespace.c | 17 +++++++++-------- + net/core/rtnetlink.c | 14 +++++++------- + net/openvswitch/datapath.c | 20 +++++++++++--------- + 5 files changed, 29 insertions(+), 26 deletions(-) + +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -333,7 +333,7 @@ static inline struct net *read_pnet(cons + #define __net_initconst __initconst + #endif + +-int peernet2id_alloc(struct net *net, struct net *peer); ++int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); + int peernet2id(struct net *net, struct net *peer); + bool peernet_has_id(struct net *net, struct net *peer); + struct net *get_net_ns_by_id(struct net *net, int id); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9411,7 +9411,7 @@ int dev_change_net_namespace(struct net_ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + rcu_barrier(); + +- new_nsid = peernet2id_alloc(dev_net(dev), net); ++ new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) + new_ifindex = dev_new_index(net); +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -245,11 +245,11 @@ static int __peernet2id(struct net *net, + return __peernet2id_alloc(net, peer, &no); + } + +-static void rtnl_net_notifyid(struct net *net, int cmd, int id); ++static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp); + /* This function returns the id of a peer netns. If no id is assigned, one will + * be allocated and returned. + */ +-int peernet2id_alloc(struct net *net, struct net *peer) ++int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) + { + bool alloc = false, alive = false; + int id; +@@ -268,7 +268,7 @@ int peernet2id_alloc(struct net *net, st + id = __peernet2id_alloc(net, peer, &alloc); + spin_unlock_bh(&net->nsid_lock); + if (alloc && id >= 0) +- rtnl_net_notifyid(net, RTM_NEWNSID, id); ++ rtnl_net_notifyid(net, RTM_NEWNSID, id, gfp); + if (alive) + put_net(peer); + return id; +@@ -532,7 +532,8 @@ static void unhash_nsid(struct net *net, + idr_remove(&tmp->netns_ids, id); + spin_unlock_bh(&tmp->nsid_lock); + if (id >= 0) +- rtnl_net_notifyid(tmp, RTM_DELNSID, id); ++ rtnl_net_notifyid(tmp, RTM_DELNSID, id, ++ GFP_KERNEL); + if (tmp == last) + break; + } +@@ -764,7 +765,7 @@ static int rtnl_net_newid(struct sk_buff + err = alloc_netid(net, peer, nsid); + spin_unlock_bh(&net->nsid_lock); + if (err >= 0) { +- rtnl_net_notifyid(net, RTM_NEWNSID, err); ++ rtnl_net_notifyid(net, RTM_NEWNSID, err, GFP_KERNEL); + err = 0; + } else if (err == -ENOSPC && nsid >= 0) { + err = -EEXIST; +@@ -1051,7 +1052,7 @@ end: + return err < 0 ? err : skb->len; + } + +-static void rtnl_net_notifyid(struct net *net, int cmd, int id) ++static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp) + { + struct net_fill_args fillargs = { + .cmd = cmd, +@@ -1060,7 +1061,7 @@ static void rtnl_net_notifyid(struct net + struct sk_buff *msg; + int err = -ENOMEM; + +- msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); ++ msg = nlmsg_new(rtnl_net_get_size(), gfp); + if (!msg) + goto out; + +@@ -1068,7 +1069,7 @@ static void rtnl_net_notifyid(struct net + if (err < 0) + goto err_out; + +- rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); ++ rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, gfp); + return; + + err_out: +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1523,7 +1523,7 @@ static noinline_for_stack int nla_put_if + + static int rtnl_fill_link_netnsid(struct sk_buff *skb, + const struct net_device *dev, +- struct net *src_net) ++ struct net *src_net, gfp_t gfp) + { + bool put_iflink = false; + +@@ -1531,7 +1531,7 @@ static int rtnl_fill_link_netnsid(struct + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { +- int id = peernet2id_alloc(src_net, link_net); ++ int id = peernet2id_alloc(src_net, link_net, gfp); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + return -EMSGSIZE; +@@ -1589,7 +1589,7 @@ static int rtnl_fill_ifinfo(struct sk_bu + int type, u32 pid, u32 seq, u32 change, + unsigned int flags, u32 ext_filter_mask, + u32 event, int *new_nsid, int new_ifindex, +- int tgt_netnsid) ++ int tgt_netnsid, gfp_t gfp) + { + struct ifinfomsg *ifm; + struct nlmsghdr *nlh; +@@ -1681,7 +1681,7 @@ static int rtnl_fill_ifinfo(struct sk_bu + goto nla_put_failure; + } + +- if (rtnl_fill_link_netnsid(skb, dev, src_net)) ++ if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp)) + goto nla_put_failure; + + if (new_nsid && +@@ -2001,7 +2001,7 @@ walk_entries: + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, 0, flags, + ext_filter_mask, 0, NULL, 0, +- netnsid); ++ netnsid, GFP_KERNEL); + + if (err < 0) { + if (likely(skb->len)) +@@ -3359,7 +3359,7 @@ static int rtnl_getlink(struct sk_buff * + err = rtnl_fill_ifinfo(nskb, dev, net, + RTM_NEWLINK, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, 0, 0, ext_filter_mask, +- 0, NULL, 0, netnsid); ++ 0, NULL, 0, netnsid, GFP_KERNEL); + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); +@@ -3471,7 +3471,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(i + + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), + type, 0, 0, change, 0, 0, event, +- new_nsid, new_ifindex, -1); ++ new_nsid, new_ifindex, -1, flags); + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -1850,7 +1850,7 @@ static struct genl_family dp_datapath_ge + /* Called with ovs_mutex or RCU read lock. */ + static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, + struct net *net, u32 portid, u32 seq, +- u32 flags, u8 cmd) ++ u32 flags, u8 cmd, gfp_t gfp) + { + struct ovs_header *ovs_header; + struct ovs_vport_stats vport_stats; +@@ -1871,7 +1871,7 @@ static int ovs_vport_cmd_fill_info(struc + goto nla_put_failure; + + if (!net_eq(net, dev_net(vport->dev))) { +- int id = peernet2id_alloc(net, dev_net(vport->dev)); ++ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); + + if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) + goto nla_put_failure; +@@ -1912,11 +1912,12 @@ struct sk_buff *ovs_vport_cmd_build_info + struct sk_buff *skb; + int retval; + +- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); ++ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + +- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); ++ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, ++ GFP_KERNEL); + BUG_ON(retval < 0); + + return skb; +@@ -2058,7 +2059,7 @@ restart: + + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, +- OVS_VPORT_CMD_NEW); ++ OVS_VPORT_CMD_NEW, GFP_KERNEL); + + new_headroom = netdev_get_fwd_headroom(vport->dev); + +@@ -2119,7 +2120,7 @@ static int ovs_vport_cmd_set(struct sk_b + + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, +- OVS_VPORT_CMD_SET); ++ OVS_VPORT_CMD_SET, GFP_KERNEL); + BUG_ON(err < 0); + + ovs_unlock(); +@@ -2159,7 +2160,7 @@ static int ovs_vport_cmd_del(struct sk_b + + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, +- OVS_VPORT_CMD_DEL); ++ OVS_VPORT_CMD_DEL, GFP_KERNEL); + BUG_ON(err < 0); + + /* the vport deletion may trigger dp headroom update */ +@@ -2206,7 +2207,7 @@ static int ovs_vport_cmd_get(struct sk_b + goto exit_unlock_free; + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, +- OVS_VPORT_CMD_GET); ++ OVS_VPORT_CMD_GET, GFP_ATOMIC); + BUG_ON(err < 0); + rcu_read_unlock(); + +@@ -2242,7 +2243,8 @@ static int ovs_vport_cmd_dump(struct sk_ + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, +- OVS_VPORT_CMD_GET) < 0) ++ OVS_VPORT_CMD_GET, ++ GFP_ATOMIC) < 0) + goto out; + + j++; diff --git a/queue-5.3/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch b/queue-5.3/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch new file mode 100644 index 00000000000..96f9529d991 --- /dev/null +++ b/queue-5.3/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch @@ -0,0 +1,48 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Kazutoshi Noguchi +Date: Mon, 21 Oct 2019 00:03:07 +0900 +Subject: r8152: add device id for Lenovo ThinkPad USB-C Dock Gen 2 + +From: Kazutoshi Noguchi + +[ Upstream commit b3060531979422d5bb18d80226f978910284dc70 ] + +This device is sold as 'ThinkPad USB-C Dock Gen 2 (40AS)'. +Chipset is RTL8153 and works with r8152. +Without this, the generic cdc_ether grabs the device, and the device jam +connected networks up when the machine suspends. + +Signed-off-by: Kazutoshi Noguchi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/cdc_ether.c | 7 +++++++ + drivers/net/usb/r8152.c | 1 + + 2 files changed, 8 insertions(+) + +--- a/drivers/net/usb/cdc_ether.c ++++ b/drivers/net/usb/cdc_ether.c +@@ -787,6 +787,13 @@ static const struct usb_device_id produc + .driver_info = 0, + }, + ++/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ ++{ ++ USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, ++ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), ++ .driver_info = 0, ++}, ++ + /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ + { + USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -5402,6 +5402,7 @@ static const struct usb_device_id rtl815 + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, ++ {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, + {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, + {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)}, diff --git a/queue-5.3/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch b/queue-5.3/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch new file mode 100644 index 00000000000..1e0b2f4a3cf --- /dev/null +++ b/queue-5.3/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch @@ -0,0 +1,39 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Heiner Kallweit +Date: Fri, 1 Nov 2019 00:10:21 +0100 +Subject: r8169: fix wrong PHY ID issue with RTL8168dp + +From: Heiner Kallweit + +[ Upstream commit 62bdc8fd1c21d4263ebd18bec57f82532d09249f ] + +As reported in [0] at least one RTL8168dp version has problems +establishing a link. This chip version has an integrated RTL8211b PHY, +however the chip seems to report a wrong PHY ID, resulting in a wrong +PHY driver (for Generic Realtek PHY) being loaded. +Work around this issue by adding a hook to r8168dp_2_mdio_read() +for returning the correct PHY ID. + +[0] https://bbs.archlinux.org/viewtopic.php?id=246508 + +Fixes: 242cd9b5866a ("r8169: use phy_resume/phy_suspend") +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -976,6 +976,10 @@ static int r8168dp_2_mdio_read(struct rt + { + int value; + ++ /* Work around issue with chip reporting wrong PHY ID */ ++ if (reg == MII_PHYSID2) ++ return 0xc912; ++ + r8168dp_2_mdio_start(tp); + + value = r8169_mdio_read(tp, reg); diff --git a/queue-5.3/rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch b/queue-5.3/rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch new file mode 100644 index 00000000000..9aa74428914 --- /dev/null +++ b/queue-5.3/rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch @@ -0,0 +1,133 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: David Howells +Date: Thu, 31 Oct 2019 12:13:46 +0000 +Subject: rxrpc: Fix handling of last subpacket of jumbo packet + +From: David Howells + +[ Upstream commit f9c32435ab7221d1d6cb35738fa85a2da012b23e ] + +When rxrpc_recvmsg_data() sets the return value to 1 because it's drained +all the data for the last packet, it checks the last-packet flag on the +whole packet - but this is wrong, since the last-packet flag is only set on +the final subpacket of the last jumbo packet. This means that a call that +receives its last packet in a jumbo packet won't complete properly. + +Fix this by having rxrpc_locate_data() determine the last-packet state of +the subpacket it's looking at and passing that back to the caller rather +than having the caller look in the packet header. The caller then needs to +cache this in the rxrpc_call struct as rxrpc_locate_data() isn't then +called again for this packet. + +Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") +Fixes: e2de6c404898 ("rxrpc: Use info in skbuff instead of reparsing a jumbo packet") +Signed-off-by: David Howells +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/ar-internal.h | 1 + + net/rxrpc/recvmsg.c | 18 +++++++++++++----- + 2 files changed, 14 insertions(+), 5 deletions(-) + +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -596,6 +596,7 @@ struct rxrpc_call { + int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ ++ bool rx_pkt_last; /* Current recvmsg packet is last */ + + /* Rx/Tx circular buffer, depending on phase. + * +--- a/net/rxrpc/recvmsg.c ++++ b/net/rxrpc/recvmsg.c +@@ -267,11 +267,13 @@ static int rxrpc_verify_packet(struct rx + */ + static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, +- unsigned int *_offset, unsigned int *_len) ++ unsigned int *_offset, unsigned int *_len, ++ bool *_last) + { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len; ++ bool last = false; + int ret; + u8 annotation = *_annotation; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; +@@ -281,6 +283,8 @@ static int rxrpc_locate_data(struct rxrp + len = skb->len - offset; + if (subpacket < sp->nr_subpackets - 1) + len = RXRPC_JUMBO_DATALEN; ++ else if (sp->rx_flags & RXRPC_SKB_INCL_LAST) ++ last = true; + + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); +@@ -291,6 +295,7 @@ static int rxrpc_locate_data(struct rxrp + + *_offset = offset; + *_len = len; ++ *_last = last; + call->conn->security->locate_data(call, skb, _offset, _len); + return 0; + } +@@ -309,7 +314,7 @@ static int rxrpc_recvmsg_data(struct soc + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; +- bool last; ++ bool rx_pkt_last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = -EAGAIN, ret2; + +@@ -319,6 +324,7 @@ static int rxrpc_recvmsg_data(struct soc + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; ++ rx_pkt_last = call->rx_pkt_last; + + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; +@@ -329,6 +335,7 @@ static int rxrpc_recvmsg_data(struct soc + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + seq = hard_ack + 1; ++ + while (top = smp_load_acquire(&call->rx_top), + before_eq(seq, top) + ) { +@@ -356,7 +363,8 @@ static int rxrpc_recvmsg_data(struct soc + if (rx_pkt_offset == 0) { + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], +- &rx_pkt_offset, &rx_pkt_len); ++ &rx_pkt_offset, &rx_pkt_len, ++ &rx_pkt_last); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); + if (ret2 < 0) { +@@ -396,13 +404,12 @@ static int rxrpc_recvmsg_data(struct soc + } + + /* The whole packet has been transferred. */ +- last = sp->hdr.flags & RXRPC_LAST_PACKET; + if (!(flags & MSG_PEEK)) + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; + +- if (last) { ++ if (rx_pkt_last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); + ret = 1; + goto out; +@@ -415,6 +422,7 @@ out: + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; ++ call->rx_pkt_last = rx_pkt_last; + } + done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, diff --git a/queue-5.3/selftests-fib_tests-add-more-tests-for-metric-update.patch b/queue-5.3/selftests-fib_tests-add-more-tests-for-metric-update.patch new file mode 100644 index 00000000000..3af2508ec67 --- /dev/null +++ b/queue-5.3/selftests-fib_tests-add-more-tests-for-metric-update.patch @@ -0,0 +1,51 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Paolo Abeni +Date: Sat, 26 Oct 2019 11:53:40 +0200 +Subject: selftests: fib_tests: add more tests for metric update + +From: Paolo Abeni + +[ Upstream commit 37de3b354150450ba12275397155e68113e99901 ] + +This patch adds two more tests to ipv4_addr_metric_test() to +explicitly cover the scenarios fixed by the previous patch. + +Suggested-by: David Ahern +Signed-off-by: Paolo Abeni +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/fib_tests.sh | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/tools/testing/selftests/net/fib_tests.sh ++++ b/tools/testing/selftests/net/fib_tests.sh +@@ -1438,6 +1438,27 @@ ipv4_addr_metric_test() + fi + log_test $rc 0 "Prefix route with metric on link up" + ++ # explicitly check for metric changes on edge scenarios ++ run_cmd "$IP addr flush dev dummy2" ++ run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259" ++ run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260" ++ rc=$? ++ if [ $rc -eq 0 ]; then ++ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260" ++ rc=$? ++ fi ++ log_test $rc 0 "Modify metric of .0/24 address" ++ ++ run_cmd "$IP addr flush dev dummy2" ++ run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260" ++ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261" ++ rc=$? ++ if [ $rc -eq 0 ]; then ++ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" ++ rc=$? ++ fi ++ log_test $rc 0 "Modify metric of address with peer route" ++ + $IP li del dummy1 + $IP li del dummy2 + cleanup diff --git a/queue-5.3/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch b/queue-5.3/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch new file mode 100644 index 00000000000..eb5536bde3e --- /dev/null +++ b/queue-5.3/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch @@ -0,0 +1,44 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Wei Wang +Date: Thu, 31 Oct 2019 16:24:36 -0700 +Subject: selftests: net: reuseport_dualstack: fix uninitalized parameter + +From: Wei Wang + +[ Upstream commit d64479a3e3f9924074ca7b50bd72fa5211dca9c1 ] + +This test reports EINVAL for getsockopt(SOL_SOCKET, SO_DOMAIN) +occasionally due to the uninitialized length parameter. +Initialize it to fix this, and also use int for "test_family" to comply +with the API standard. + +Fixes: d6a61f80b871 ("soreuseport: test mixed v4/v6 sockets") +Reported-by: Maciej Å»enczykowski +Signed-off-by: Eric Dumazet +Signed-off-by: Wei Wang +Cc: Craig Gallek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/reuseport_dualstack.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/net/reuseport_dualstack.c ++++ b/tools/testing/selftests/net/reuseport_dualstack.c +@@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count + { + struct epoll_event ev; + int epfd, i, test_fd; +- uint16_t test_family; ++ int test_family; + socklen_t len; + + epfd = epoll_create(1); +@@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count + send_from_v4(proto); + + test_fd = receive_once(epfd, proto); ++ len = sizeof(test_family); + if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) + error(1, errno, "failed to read socket domain"); + if (test_family != AF_INET) diff --git a/queue-5.3/series b/queue-5.3/series index 7553a43e2a4..6a11980a1bb 100644 --- a/queue-5.3/series +++ b/queue-5.3/series @@ -77,3 +77,58 @@ irqchip-sifive-plic-skip-contexts-except-supervisor-.patch nbd-protect-cmd-status-with-cmd-lock.patch nbd-handle-racing-with-error-ed-out-commands.patch ata-libahci_platform-fix-regulator_get_optional-misu.patch +cxgb4-fix-panic-when-attaching-to-uld-fail.patch +cxgb4-request-the-tx-cidx-updates-to-status-page.patch +dccp-do-not-leak-jiffies-on-the-wire.patch +erspan-fix-the-tun_info-options_len-check-for-erspan.patch +inet-stop-leaking-jiffies-on-the-wire.patch +net-annotate-accesses-to-sk-sk_incoming_cpu.patch +net-annotate-lockless-accesses-to-sk-sk_napi_id.patch +net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch +net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch +net-fix-sk_page_frag-recursion-from-memory-reclaim.patch +net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch +net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch +netns-fix-gfp-flags-in-rtnl_net_notifyid.patch +net-rtnetlink-fix-a-typo-fbd-fdb.patch +net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch +net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch +selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch +udp-fix-data-race-in-udp_set_dev_scratch.patch +vxlan-check-tun_info-options_len-properly.patch +net-add-skb_queue_empty_lockless.patch +udp-use-skb_queue_empty_lockless.patch +net-use-skb_queue_empty_lockless-in-poll-handlers.patch +net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch +net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch +ipv4-fix-route-update-on-metric-change.patch +selftests-fib_tests-add-more-tests-for-metric-update.patch +net-smc-fix-closing-of-fallback-smc-sockets.patch +net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch +keys-fix-memory-leak-in-copy_net_ns.patch +net-phylink-fix-phylink_dbg-macro.patch +rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch +net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch +net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch +net-mlx5e-initialize-on-stack-link-modes-bitmap.patch +net-mlx5-fix-flow-counter-list-auto-bits-struct.patch +net-smc-fix-refcounting-for-non-blocking-connect.patch +net-mlx5-fix-rtable-reference-leak.patch +mlxsw-core-unpublish-devlink-parameters-during-reload.patch +r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch +net-mlx5e-fix-ethtool-self-test-link-speed.patch +net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch +ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch +net-bcmgenet-don-t-set-phydev-link-from-mac.patch +net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch +net-dsa-fix-switch-tree-list.patch +net-ensure-correct-skb-tstamp-in-various-fragmenters.patch +net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch +net-netem-fix-error-path-for-corrupted-gso-frames.patch +net-reorder-struct-net-fields-to-avoid-false-sharing.patch +net-usb-lan78xx-connect-phy-before-registering-mac.patch +r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch +net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch +net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch +net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch +net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch diff --git a/queue-5.3/udp-fix-data-race-in-udp_set_dev_scratch.patch b/queue-5.3/udp-fix-data-race-in-udp_set_dev_scratch.patch new file mode 100644 index 00000000000..f2967c0caad --- /dev/null +++ b/queue-5.3/udp-fix-data-race-in-udp_set_dev_scratch.patch @@ -0,0 +1,102 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Thu, 24 Oct 2019 11:43:31 -0700 +Subject: udp: fix data-race in udp_set_dev_scratch() + +From: Eric Dumazet + +[ Upstream commit a793183caa9afae907a0d7ddd2ffd57329369bf5 ] + +KCSAN reported a data-race in udp_set_dev_scratch() [1] + +The issue here is that we must not write over skb fields +if skb is shared. A similar issue has been fixed in commit +89c22d8c3b27 ("net: Fix skb csum races when peeking") + +While we are at it, use a helper only dealing with +udp_skb_scratch(skb)->csum_unnecessary, as this allows +udp_set_dev_scratch() to be called once and thus inlined. + +[1] +BUG: KCSAN: data-race in udp_set_dev_scratch / udpv6_recvmsg + +write to 0xffff888120278317 of 1 bytes by task 10411 on cpu 1: + udp_set_dev_scratch+0xea/0x200 net/ipv4/udp.c:1308 + __first_packet_length+0x147/0x420 net/ipv4/udp.c:1556 + first_packet_length+0x68/0x2a0 net/ipv4/udp.c:1579 + udp_poll+0xea/0x110 net/ipv4/udp.c:2720 + sock_poll+0xed/0x250 net/socket.c:1256 + vfs_poll include/linux/poll.h:90 [inline] + do_select+0x7d0/0x1020 fs/select.c:534 + core_sys_select+0x381/0x550 fs/select.c:677 + do_pselect.constprop.0+0x11d/0x160 fs/select.c:759 + __do_sys_pselect6 fs/select.c:784 [inline] + __se_sys_pselect6 fs/select.c:769 [inline] + __x64_sys_pselect6+0x12e/0x170 fs/select.c:769 + do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +read to 0xffff888120278317 of 1 bytes by task 10413 on cpu 0: + udp_skb_csum_unnecessary include/net/udp.h:358 [inline] + udpv6_recvmsg+0x43e/0xe90 net/ipv6/udp.c:310 + inet6_recvmsg+0xbb/0x240 net/ipv6/af_inet6.c:592 + sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 + ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 + do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 + __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 + __do_sys_recvmmsg net/socket.c:2703 [inline] + __se_sys_recvmmsg net/socket.c:2696 [inline] + __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 + do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 10413 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Paolo Abeni +Reviewed-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1316,6 +1316,20 @@ static void udp_set_dev_scratch(struct s + scratch->_tsize_state |= UDP_SKB_IS_STATELESS; + } + ++static void udp_skb_csum_unnecessary_set(struct sk_buff *skb) ++{ ++ /* We come here after udp_lib_checksum_complete() returned 0. ++ * This means that __skb_checksum_complete() might have ++ * set skb->csum_valid to 1. ++ * On 64bit platforms, we can set csum_unnecessary ++ * to true, but only if the skb is not shared. ++ */ ++#if BITS_PER_LONG == 64 ++ if (!skb_shared(skb)) ++ udp_skb_scratch(skb)->csum_unnecessary = true; ++#endif ++} ++ + static int udp_skb_truesize(struct sk_buff *skb) + { + return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; +@@ -1550,10 +1564,7 @@ static struct sk_buff *__first_packet_le + *total += skb->truesize; + kfree_skb(skb); + } else { +- /* the csum related bits could be changed, refresh +- * the scratch area +- */ +- udp_set_dev_scratch(skb); ++ udp_skb_csum_unnecessary_set(skb); + break; + } + } diff --git a/queue-5.3/udp-use-skb_queue_empty_lockless.patch b/queue-5.3/udp-use-skb_queue_empty_lockless.patch new file mode 100644 index 00000000000..cd29530dfdb --- /dev/null +++ b/queue-5.3/udp-use-skb_queue_empty_lockless.patch @@ -0,0 +1,98 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Eric Dumazet +Date: Wed, 23 Oct 2019 22:44:49 -0700 +Subject: udp: use skb_queue_empty_lockless() + +From: Eric Dumazet + +[ Upstream commit 137a0dbe3426fd7bcfe3f8117b36a87b3590e4eb ] + +syzbot reported a data-race [1]. + +We should use skb_queue_empty_lockless() to document that we are +not ensuring a mutual exclusion and silence KCSAN. + +[1] +BUG: KCSAN: data-race in __skb_recv_udp / __udp_enqueue_schedule_skb + +write to 0xffff888122474b50 of 8 bytes by interrupt on cpu 0: + __skb_insert include/linux/skbuff.h:1852 [inline] + __skb_queue_before include/linux/skbuff.h:1958 [inline] + __skb_queue_tail include/linux/skbuff.h:1991 [inline] + __udp_enqueue_schedule_skb+0x2c1/0x410 net/ipv4/udp.c:1470 + __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] + udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 + udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 + udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 + __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 + udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 + ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 + ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 + dst_input include/net/dst.h:442 [inline] + ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + +read to 0xffff888122474b50 of 8 bytes by task 8921 on cpu 1: + skb_queue_empty include/linux/skbuff.h:1494 [inline] + __skb_recv_udp+0x18d/0x500 net/ipv4/udp.c:1653 + udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 + inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 + sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 + ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 + do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 + __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 + __do_sys_recvmmsg net/socket.c:2703 [inline] + __se_sys_recvmmsg net/socket.c:2696 [inline] + __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 + do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 8921 Comm: syz-executor.4 Not tainted 5.4.0-rc3+ #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1588,7 +1588,7 @@ static int first_packet_length(struct so + + spin_lock_bh(&rcvq->lock); + skb = __first_packet_length(sk, rcvq, &total); +- if (!skb && !skb_queue_empty(sk_queue)) { ++ if (!skb && !skb_queue_empty_lockless(sk_queue)) { + spin_lock(&sk_queue->lock); + skb_queue_splice_tail_init(sk_queue, rcvq); + spin_unlock(&sk_queue->lock); +@@ -1661,7 +1661,7 @@ struct sk_buff *__skb_recv_udp(struct so + return skb; + } + +- if (skb_queue_empty(sk_queue)) { ++ if (skb_queue_empty_lockless(sk_queue)) { + spin_unlock_bh(&queue->lock); + goto busy_check; + } +@@ -1687,7 +1687,7 @@ busy_check: + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); +- } while (!skb_queue_empty(sk_queue)); ++ } while (!skb_queue_empty_lockless(sk_queue)); + + /* sk_queue is empty, reader_queue may contain peeked packets */ + } while (timeo && diff --git a/queue-5.3/vxlan-check-tun_info-options_len-properly.patch b/queue-5.3/vxlan-check-tun_info-options_len-properly.patch new file mode 100644 index 00000000000..0e28d554863 --- /dev/null +++ b/queue-5.3/vxlan-check-tun_info-options_len-properly.patch @@ -0,0 +1,38 @@ +From foo@baz Wed 06 Nov 2019 03:23:18 PM CET +From: Xin Long +Date: Tue, 29 Oct 2019 01:24:32 +0800 +Subject: vxlan: check tun_info options_len properly + +From: Xin Long + +[ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ] + +This patch is to improve the tun_info options_len by dropping +the skb when TUNNEL_VXLAN_OPT is set but options_len is less +than vxlan_metadata. This can void a potential out-of-bounds +access on ip_tun_info. + +Fixes: ee122c79d422 ("vxlan: Flow based tunneling") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2487,9 +2487,11 @@ static void vxlan_xmit_one(struct sk_buf + vni = tunnel_id_to_key32(info->key.tun_id); + ifindex = 0; + dst_cache = &info->dst_cache; +- if (info->options_len && +- info->key.tun_flags & TUNNEL_VXLAN_OPT) ++ if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { ++ if (info->options_len < sizeof(*md)) ++ goto drop; + md = ip_tunnel_info_opts(info); ++ } + ttl = info->key.ttl; + tos = info->key.tos; + label = info->key.label; -- 2.47.2