From 6b6035bfea870be3f27e9c5396d5935a42021320 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 10 Apr 2019 11:30:32 -0400 Subject: [PATCH] net patches from davem for 5.0 Signed-off-by: Sasha Levin --- ...ove-rx-consumer-index-validity-check.patch | 59 +++++++ ..._en-reset-device-on-rx-buffer-errors.patch | 44 +++++ ...fix-unwanted-wakeup-after-tx_disable.patch | 164 ++++++++++++++++++ ...-completion-structure-initialization.patch | 92 ++++++++++ ...match-to-arphrd_tunnel6-for-dev-type.patch | 53 ++++++ ...-dangling-pointer-when-ipv6-fragment.patch | 62 +++++++ ...reset-ip-header-pointer-in-ipip6_rcv.patch | 119 +++++++++++++ ...-of-device-registration-to-fix-a-cra.patch | 94 ++++++++++ ...s-clear-mcast-matching-struct-on-rep.patch | 78 +++++++++ ...eceive_skb_list-unlist-skb-before-pa.patch | 95 ++++++++++ ...call-vzalloc-for-zero-sized-memory-r.patch | 104 +++++++++++ ...ro-flush-when-receiving-a-gso-packet.patch | 42 +++++ ...possible-use-after-free-in-ip6erspan.patch | 88 ++++++++++ ...ossible-use-after-free-in-erspan_rcv.patch | 73 ++++++++ ...a-missing-check-on-idr_find-free-buf.patch | 63 +++++++ ...-mlx5-decrease-default-mr-cache-size.patch | 60 +++++++ .../net-mlx5e-add-a-lock-on-tir-list.patch | 85 +++++++++ ...-error-handling-when-refreshing-tirs.patch | 48 +++++ queue-5.0/net-mlx5e-update-xoff-formula.patch | 49 ++++++ queue-5.0/net-mlx5e-update-xon-formula.patch | 147 ++++++++++++++++ ...-destroy-connection-if-t_sock-is-nul.patch | 148 ++++++++++++++++ ...mple-fix-divide-by-zero-in-the-traff.patch | 104 +++++++++++ ...d-fix-get-helper-of-the-matchall-cls.patch | 57 ++++++ ...-null-pointer-dereference-in-nicvf_o.patch | 136 +++++++++++++++ ...ping-failed-when-vrf-mtu-is-set-to-0.patch | 63 +++++++ ...rovide-pure-entropy-for-net_hash_mix.patch | 81 +++++++++ .../nfp-disable-netpoll-on-representors.patch | 46 +++++ ...-the-return-code-from-dev_queue_xmit.patch | 37 ++++ ...switch-fix-flow-actions-reallocation.patch | 68 ++++++++ queue-5.0/qmi_wwan-add-olicard-600.patch | 65 +++++++ queue-5.0/r8169-disable-aspm-again.patch | 53 ++++++ ...fault-rx-interrupt-coalescing-on-rtl.patch | 41 +++++ ..._pad-of-sockaddr_in-before-copying-t.patch | 58 +++++++ queue-5.0/series | 36 ++++ .../tcp-ensure-dctcp-reacts-to-losses.patch | 145 ++++++++++++++++ ...ial-null-pointer-dereference-in-tcp_.patch | 41 +++++ ..._source_route-on-the-original-netdev.patch | 98 +++++++++++ 37 files changed, 2896 insertions(+) create mode 100644 queue-5.0/bnxt_en-improve-rx-consumer-index-validity-check.patch create mode 100644 queue-5.0/bnxt_en-reset-device-on-rx-buffer-errors.patch create mode 100644 queue-5.0/hv_netvsc-fix-unwanted-wakeup-after-tx_disable.patch create mode 100644 queue-5.0/ibmvnic-fix-completion-structure-initialization.patch create mode 100644 queue-5.0/ip6_tunnel-match-to-arphrd_tunnel6-for-dev-type.patch create mode 100644 queue-5.0/ipv6-fix-dangling-pointer-when-ipv6-fragment.patch create mode 100644 queue-5.0/ipv6-sit-reset-ip-header-pointer-in-ipip6_rcv.patch create mode 100644 queue-5.0/kcm-switch-order-of-device-registration-to-fix-a-cra.patch create mode 100644 queue-5.0/net-bridge-always-clear-mcast-matching-struct-on-rep.patch create mode 100644 queue-5.0/net-core-netif_receive_skb_list-unlist-skb-before-pa.patch create mode 100644 queue-5.0/net-ethtool-not-call-vzalloc-for-zero-sized-memory-r.patch create mode 100644 queue-5.0/net-gro-fix-gro-flush-when-receiving-a-gso-packet.patch create mode 100644 queue-5.0/net-ip6_gre-fix-possible-use-after-free-in-ip6erspan.patch create mode 100644 queue-5.0/net-ip_gre-fix-possible-use-after-free-in-erspan_rcv.patch create mode 100644 queue-5.0/net-mlx5-add-a-missing-check-on-idr_find-free-buf.patch create mode 100644 queue-5.0/net-mlx5-decrease-default-mr-cache-size.patch create mode 100644 queue-5.0/net-mlx5e-add-a-lock-on-tir-list.patch create mode 100644 queue-5.0/net-mlx5e-fix-error-handling-when-refreshing-tirs.patch create mode 100644 queue-5.0/net-mlx5e-update-xoff-formula.patch create mode 100644 queue-5.0/net-mlx5e-update-xon-formula.patch create mode 100644 queue-5.0/net-rds-force-to-destroy-connection-if-t_sock-is-nul.patch create mode 100644 queue-5.0/net-sched-act_sample-fix-divide-by-zero-in-the-traff.patch create mode 100644 queue-5.0/net-sched-fix-get-helper-of-the-matchall-cls.patch create mode 100644 queue-5.0/net-thunderx-fix-null-pointer-dereference-in-nicvf_o.patch create mode 100644 queue-5.0/net-vrf-fix-ping-failed-when-vrf-mtu-is-set-to-0.patch create mode 100644 queue-5.0/netns-provide-pure-entropy-for-net_hash_mix.patch create mode 100644 queue-5.0/nfp-disable-netpoll-on-representors.patch create mode 100644 queue-5.0/nfp-validate-the-return-code-from-dev_queue_xmit.patch create mode 100644 queue-5.0/openvswitch-fix-flow-actions-reallocation.patch create mode 100644 queue-5.0/qmi_wwan-add-olicard-600.patch create mode 100644 queue-5.0/r8169-disable-aspm-again.patch create mode 100644 queue-5.0/r8169-disable-default-rx-interrupt-coalescing-on-rtl.patch create mode 100644 queue-5.0/sctp-initialize-_pad-of-sockaddr_in-before-copying-t.patch create mode 100644 queue-5.0/tcp-ensure-dctcp-reacts-to-losses.patch create mode 100644 queue-5.0/tcp-fix-a-potential-null-pointer-dereference-in-tcp_.patch create mode 100644 queue-5.0/vrf-check-accept_source_route-on-the-original-netdev.patch diff --git a/queue-5.0/bnxt_en-improve-rx-consumer-index-validity-check.patch b/queue-5.0/bnxt_en-improve-rx-consumer-index-validity-check.patch new file mode 100644 index 0000000000..a4839b5fd1 --- /dev/null +++ b/queue-5.0/bnxt_en-improve-rx-consumer-index-validity-check.patch @@ -0,0 +1,59 @@ +From 230a08b71465667df5cff3cb283a7babe256c8e0 Mon Sep 17 00:00:00 2001 +From: Michael Chan +Date: Mon, 8 Apr 2019 17:39:54 -0400 +Subject: bnxt_en: Improve RX consumer index validity check. + +[ Upstream commit a1b0e4e684e9c300b9e759b46cb7a0147e61ddff ] + +There is logic to check that the RX/TPA consumer index is the expected +index to work around a hardware problem. However, the potentially bad +consumer index is first used to index into an array to reference an entry. +This can potentially crash if the bad consumer index is beyond legal +range. Improve the logic to use the consumer index for dereferencing +after the validity check and log an error message. + +Fixes: fa7e28127a5a ("bnxt_en: Add workaround to detect bad opaque in rx completion (part 2)") +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 803f7990d32b..351417e74ae2 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1129,6 +1129,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + tpa_info = &rxr->rx_tpa[agg_id]; + + if (unlikely(cons != rxr->rx_next_cons)) { ++ netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n", ++ cons, rxr->rx_next_cons); + bnxt_sched_reset(bp, rxr); + return; + } +@@ -1581,15 +1583,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + } + + cons = rxcmp->rx_cmp_opaque; +- rx_buf = &rxr->rx_buf_ring[cons]; +- data = rx_buf->data; +- data_ptr = rx_buf->data_ptr; + if (unlikely(cons != rxr->rx_next_cons)) { + int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp); + ++ netdev_warn(bp->dev, "RX cons %x != expected cons %x\n", ++ cons, rxr->rx_next_cons); + bnxt_sched_reset(bp, rxr); + return rc1; + } ++ rx_buf = &rxr->rx_buf_ring[cons]; ++ data = rx_buf->data; ++ data_ptr = rx_buf->data_ptr; + prefetch(data_ptr); + + misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1); +-- +2.19.1 + diff --git a/queue-5.0/bnxt_en-reset-device-on-rx-buffer-errors.patch b/queue-5.0/bnxt_en-reset-device-on-rx-buffer-errors.patch new file mode 100644 index 0000000000..c61c3edbe9 --- /dev/null +++ b/queue-5.0/bnxt_en-reset-device-on-rx-buffer-errors.patch @@ -0,0 +1,44 @@ +From af2a09446e793fe72da3a2689d0139bb7e62fe44 Mon Sep 17 00:00:00 2001 +From: Michael Chan +Date: Mon, 8 Apr 2019 17:39:55 -0400 +Subject: bnxt_en: Reset device on RX buffer errors. + +[ Upstream commit 8e44e96c6c8e8fb80b84a2ca11798a8554f710f2 ] + +If the RX completion indicates RX buffers errors, the RX ring will be +disabled by firmware and no packets will be received on that ring from +that point on. Recover by resetting the device. + +Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 351417e74ae2..40ca339ec3df 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1610,11 +1610,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + + rx_buf->data = NULL; + if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) { ++ u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2); ++ + bnxt_reuse_rx_data(rxr, cons, data); + if (agg_bufs) + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + + rc = -EIO; ++ if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { ++ netdev_warn(bp->dev, "RX buffer error %x\n", rx_err); ++ bnxt_sched_reset(bp, rxr); ++ } + goto next_rx; + } + +-- +2.19.1 + diff --git a/queue-5.0/hv_netvsc-fix-unwanted-wakeup-after-tx_disable.patch b/queue-5.0/hv_netvsc-fix-unwanted-wakeup-after-tx_disable.patch new file mode 100644 index 0000000000..646077c2c2 --- /dev/null +++ b/queue-5.0/hv_netvsc-fix-unwanted-wakeup-after-tx_disable.patch @@ -0,0 +1,164 @@ +From a4cedf217f200329fa8983535019c3b407de668a Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Thu, 28 Mar 2019 19:40:36 +0000 +Subject: hv_netvsc: Fix unwanted wakeup after tx_disable + +[ Upstream commit 1b704c4a1ba95574832e730f23817b651db2aa59 ] + +After queue stopped, the wakeup mechanism may wake it up again +when ring buffer usage is lower than a threshold. This may cause +send path panic on NULL pointer when we stopped all tx queues in +netvsc_detach and start removing the netvsc device. + +This patch fix it by adding a tx_disable flag to prevent unwanted +queue wakeup. + +Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") +Reported-by: Mohammed Gamal +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/hyperv_net.h | 1 + + drivers/net/hyperv/netvsc.c | 6 ++++-- + drivers/net/hyperv/netvsc_drv.c | 32 ++++++++++++++++++++++++++------ + 3 files changed, 31 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index e859ae2e42d5..49f41b64077b 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -987,6 +987,7 @@ struct netvsc_device { + + wait_queue_head_t wait_drain; + bool destroy; ++ bool tx_disable; /* if true, do not wake up queue again */ + + /* Receive buffer allocated by us but manages by NetVSP */ + void *recv_buf; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 813d195bbd57..e0dce373cdd9 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -110,6 +110,7 @@ static struct netvsc_device *alloc_net_device(void) + + init_waitqueue_head(&net_device->wait_drain); + net_device->destroy = false; ++ net_device->tx_disable = false; + + net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; + net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; +@@ -719,7 +720,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, + } else { + struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); + +- if (netif_tx_queue_stopped(txq) && ++ if (netif_tx_queue_stopped(txq) && !net_device->tx_disable && + (hv_get_avail_to_write_percent(&channel->outbound) > + RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) { + netif_tx_wake_queue(txq); +@@ -874,7 +875,8 @@ static inline int netvsc_send_pkt( + } else if (ret == -EAGAIN) { + netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; +- if (atomic_read(&nvchan->queue_sends) < 1) { ++ if (atomic_read(&nvchan->queue_sends) < 1 && ++ !net_device->tx_disable) { + netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; + ret = -ENOSPC; +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index cf4897043e83..b20fb0fb595b 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -109,6 +109,15 @@ static void netvsc_set_rx_mode(struct net_device *net) + rcu_read_unlock(); + } + ++static void netvsc_tx_enable(struct netvsc_device *nvscdev, ++ struct net_device *ndev) ++{ ++ nvscdev->tx_disable = false; ++ virt_wmb(); /* ensure queue wake up mechanism is on */ ++ ++ netif_tx_wake_all_queues(ndev); ++} ++ + static int netvsc_open(struct net_device *net) + { + struct net_device_context *ndev_ctx = netdev_priv(net); +@@ -129,7 +138,7 @@ static int netvsc_open(struct net_device *net) + rdev = nvdev->extension; + if (!rdev->link_state) { + netif_carrier_on(net); +- netif_tx_wake_all_queues(net); ++ netvsc_tx_enable(nvdev, net); + } + + if (vf_netdev) { +@@ -184,6 +193,17 @@ static int netvsc_wait_until_empty(struct netvsc_device *nvdev) + } + } + ++static void netvsc_tx_disable(struct netvsc_device *nvscdev, ++ struct net_device *ndev) ++{ ++ if (nvscdev) { ++ nvscdev->tx_disable = true; ++ virt_wmb(); /* ensure txq will not wake up after stop */ ++ } ++ ++ netif_tx_disable(ndev); ++} ++ + static int netvsc_close(struct net_device *net) + { + struct net_device_context *net_device_ctx = netdev_priv(net); +@@ -192,7 +212,7 @@ static int netvsc_close(struct net_device *net) + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); + int ret; + +- netif_tx_disable(net); ++ netvsc_tx_disable(nvdev, net); + + /* No need to close rndis filter if it is removed already */ + if (!nvdev) +@@ -920,7 +940,7 @@ static int netvsc_detach(struct net_device *ndev, + + /* If device was up (receiving) then shutdown */ + if (netif_running(ndev)) { +- netif_tx_disable(ndev); ++ netvsc_tx_disable(nvdev, ndev); + + ret = rndis_filter_close(nvdev); + if (ret) { +@@ -1908,7 +1928,7 @@ static void netvsc_link_change(struct work_struct *w) + if (rdev->link_state) { + rdev->link_state = false; + netif_carrier_on(net); +- netif_tx_wake_all_queues(net); ++ netvsc_tx_enable(net_device, net); + } else { + notify = true; + } +@@ -1918,7 +1938,7 @@ static void netvsc_link_change(struct work_struct *w) + if (!rdev->link_state) { + rdev->link_state = true; + netif_carrier_off(net); +- netif_tx_stop_all_queues(net); ++ netvsc_tx_disable(net_device, net); + } + kfree(event); + break; +@@ -1927,7 +1947,7 @@ static void netvsc_link_change(struct work_struct *w) + if (!rdev->link_state) { + rdev->link_state = true; + netif_carrier_off(net); +- netif_tx_stop_all_queues(net); ++ netvsc_tx_disable(net_device, net); + event->event = RNDIS_STATUS_MEDIA_CONNECT; + spin_lock_irqsave(&ndev_ctx->lock, flags); + list_add(&event->list, &ndev_ctx->reconfig_events); +-- +2.19.1 + diff --git a/queue-5.0/ibmvnic-fix-completion-structure-initialization.patch b/queue-5.0/ibmvnic-fix-completion-structure-initialization.patch new file mode 100644 index 0000000000..99203ac909 --- /dev/null +++ b/queue-5.0/ibmvnic-fix-completion-structure-initialization.patch @@ -0,0 +1,92 @@ +From 7fbd41451a1ca4a96cbbd26be172683d4959e6ac Mon Sep 17 00:00:00 2001 +From: Thomas Falcon +Date: Thu, 4 Apr 2019 18:58:26 -0500 +Subject: ibmvnic: Fix completion structure initialization + +[ Upstream commit bbd669a868bba591ffd38b7bc75a7b361bb54b04 ] + +Fix device initialization completion handling for vNIC adapters. +Initialize the completion structure on probe and reinitialize when needed. +This also fixes a race condition during kdump where the driver can attempt +to access the completion struct before it is initialized: + +Unable to handle kernel paging request for data at address 0x00000000 +Faulting instruction address: 0xc0000000081acbe0 +Oops: Kernel access of bad area, sig: 11 [#1] +LE SMP NR_CPUS=2048 NUMA pSeries +Modules linked in: ibmvnic(+) ibmveth sunrpc overlay squashfs loop +CPU: 19 PID: 301 Comm: systemd-udevd Not tainted 4.18.0-64.el8.ppc64le #1 +NIP: c0000000081acbe0 LR: c0000000081ad964 CTR: c0000000081ad900 +REGS: c000000027f3f990 TRAP: 0300 Not tainted (4.18.0-64.el8.ppc64le) +MSR: 800000010280b033 CR: 28228288 XER: 00000006 +CFAR: c000000008008934 DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 1 +GPR00: c0000000081ad964 c000000027f3fc10 c0000000095b5800 c0000000221b4e58 +GPR04: 0000000000000003 0000000000000001 000049a086918581 00000000000000d4 +GPR08: 0000000000000007 0000000000000000 ffffffffffffffe8 d0000000014dde28 +GPR12: c0000000081ad900 c000000009a00c00 0000000000000001 0000000000000100 +GPR16: 0000000000000038 0000000000000007 c0000000095e2230 0000000000000006 +GPR20: 0000000000400140 0000000000000001 c00000000910c880 0000000000000000 +GPR24: 0000000000000000 0000000000000006 0000000000000000 0000000000000003 +GPR28: 0000000000000001 0000000000000001 c0000000221b4e60 c0000000221b4e58 +NIP [c0000000081acbe0] __wake_up_locked+0x50/0x100 +LR [c0000000081ad964] complete+0x64/0xa0 +Call Trace: +[c000000027f3fc10] [c000000027f3fc60] 0xc000000027f3fc60 (unreliable) +[c000000027f3fc60] [c0000000081ad964] complete+0x64/0xa0 +[c000000027f3fca0] [d0000000014dad58] ibmvnic_handle_crq+0xce0/0x1160 [ibmvnic] +[c000000027f3fd50] [d0000000014db270] ibmvnic_tasklet+0x98/0x130 [ibmvnic] +[c000000027f3fda0] [c00000000813f334] tasklet_action_common.isra.3+0xc4/0x1a0 +[c000000027f3fe00] [c000000008cd13f4] __do_softirq+0x164/0x400 +[c000000027f3fef0] [c00000000813ed64] irq_exit+0x184/0x1c0 +[c000000027f3ff20] [c0000000080188e8] __do_irq+0xb8/0x210 +[c000000027f3ff90] [c00000000802d0a4] call_do_irq+0x14/0x24 +[c000000026a5b010] [c000000008018adc] do_IRQ+0x9c/0x130 +[c000000026a5b060] [c000000008008ce4] hardware_interrupt_common+0x114/0x120 + +Signed-off-by: Thomas Falcon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ibm/ibmvnic.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c +index 5ecbb1adcf3b..51cfe95f3e24 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -1885,6 +1885,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, + */ + adapter->state = VNIC_PROBED; + ++ reinit_completion(&adapter->init_done); + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(adapter->netdev, +@@ -4625,7 +4626,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) + old_num_rx_queues = adapter->req_rx_queues; + old_num_tx_queues = adapter->req_tx_queues; + +- init_completion(&adapter->init_done); ++ reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; + ibmvnic_send_crq_init(adapter); + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { +@@ -4680,7 +4681,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) + + adapter->from_passive_init = false; + +- init_completion(&adapter->init_done); + adapter->init_done_rc = 0; + ibmvnic_send_crq_init(adapter); + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { +@@ -4759,6 +4759,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) + INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); + INIT_LIST_HEAD(&adapter->rwi_list); + spin_lock_init(&adapter->rwi_lock); ++ init_completion(&adapter->init_done); + adapter->resetting = false; + + adapter->mac_change_pending = false; +-- +2.19.1 + diff --git a/queue-5.0/ip6_tunnel-match-to-arphrd_tunnel6-for-dev-type.patch b/queue-5.0/ip6_tunnel-match-to-arphrd_tunnel6-for-dev-type.patch new file mode 100644 index 0000000000..e8f81a3c3c --- /dev/null +++ b/queue-5.0/ip6_tunnel-match-to-arphrd_tunnel6-for-dev-type.patch @@ -0,0 +1,53 @@ +From edee6ed263cd8fa074d7c0a832968b251a027f2d Mon Sep 17 00:00:00 2001 +From: Sheena Mira-ato +Date: Mon, 1 Apr 2019 13:04:42 +1300 +Subject: ip6_tunnel: Match to ARPHRD_TUNNEL6 for dev type + +[ Upstream commit b2e54b09a3d29c4db883b920274ca8dca4d9f04d ] + +The device type for ip6 tunnels is set to +ARPHRD_TUNNEL6. However, the ip4ip6_err function +is expecting the device type of the tunnel to be +ARPHRD_TUNNEL. Since the device types do not +match, the function exits and the ICMP error +packet is not sent to the originating host. Note +that the device type for IPv4 tunnels is set to +ARPHRD_TUNNEL. + +Fix is to expect a tunnel device type of +ARPHRD_TUNNEL6 instead. Now the tunnel device +type matches and the ICMP error packet is sent +to the originating host. + +Signed-off-by: Sheena Mira-ato +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_tunnel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index 0c6403cf8b52..ade1390c6348 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, + eiph->daddr, eiph->saddr, 0, 0, + IPPROTO_IPIP, RT_TOS(eiph->tos), 0); +- if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { ++ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { + if (!IS_ERR(rt)) + ip_rt_put(rt); + goto out; +@@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + } else { + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, + skb2->dev) || +- skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) ++ skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) + goto out; + } + +-- +2.19.1 + diff --git a/queue-5.0/ipv6-fix-dangling-pointer-when-ipv6-fragment.patch b/queue-5.0/ipv6-fix-dangling-pointer-when-ipv6-fragment.patch new file mode 100644 index 0000000000..7bb9eaa4e4 --- /dev/null +++ b/queue-5.0/ipv6-fix-dangling-pointer-when-ipv6-fragment.patch @@ -0,0 +1,62 @@ +From 5b59843e1b2d043667748e6fea8edb7302548a63 Mon Sep 17 00:00:00 2001 +From: Junwei Hu +Date: Tue, 2 Apr 2019 19:38:04 +0800 +Subject: ipv6: Fix dangling pointer when ipv6 fragment + +[ Upstream commit ef0efcd3bd3fd0589732b67fb586ffd3c8705806 ] + +At the beginning of ip6_fragment func, the prevhdr pointer is +obtained in the ip6_find_1stfragopt func. +However, all the pointers pointing into skb header may change +when calling skb_checksum_help func with +skb->ip_summed = CHECKSUM_PARTIAL condition. +The prevhdr pointe will be dangling if it is not reloaded after +calling __skb_linearize func in skb_checksum_help func. + +Here, I add a variable, nexthdr_offset, to evaluate the offset, +which does not changes even after calling __skb_linearize func. + +Fixes: 405c92f7a541 ("ipv6: add defensive check for CHECKSUM_PARTIAL skbs in ip_fragment") +Signed-off-by: Junwei Hu +Reported-by: Wenhao Zhang +Reported-by: syzbot+e8ce541d095e486074fc@syzkaller.appspotmail.com +Reviewed-by: Zhiqiang Liu +Acked-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_output.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 5f9fa0302b5a..e71227390bec 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -595,7 +595,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + inet6_sk(skb->sk) : NULL; + struct ipv6hdr *tmp_hdr; + struct frag_hdr *fh; +- unsigned int mtu, hlen, left, len; ++ unsigned int mtu, hlen, left, len, nexthdr_offset; + int hroom, troom; + __be32 frag_id; + int ptr, offset = 0, err = 0; +@@ -606,6 +606,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + goto fail; + hlen = err; + nexthdr = *prevhdr; ++ nexthdr_offset = prevhdr - skb_network_header(skb); + + mtu = ip6_skb_dst_mtu(skb); + +@@ -640,6 +641,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + (err = skb_checksum_help(skb))) + goto fail; + ++ prevhdr = skb_network_header(skb) + nexthdr_offset; + hroom = LL_RESERVED_SPACE(rt->dst.dev); + if (skb_has_frag_list(skb)) { + unsigned int first_len = skb_pagelen(skb); +-- +2.19.1 + diff --git a/queue-5.0/ipv6-sit-reset-ip-header-pointer-in-ipip6_rcv.patch b/queue-5.0/ipv6-sit-reset-ip-header-pointer-in-ipip6_rcv.patch new file mode 100644 index 0000000000..1337d3a096 --- /dev/null +++ b/queue-5.0/ipv6-sit-reset-ip-header-pointer-in-ipip6_rcv.patch @@ -0,0 +1,119 @@ +From 088d2a494c203ef1a237d1bacc79fb120cdb5a5a Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Thu, 4 Apr 2019 16:37:53 +0200 +Subject: ipv6: sit: reset ip header pointer in ipip6_rcv + +[ Upstream commit bb9bd814ebf04f579be466ba61fc922625508807 ] + +ipip6 tunnels run iptunnel_pull_header on received skbs. This can +determine the following use-after-free accessing iph pointer since +the packet will be 'uncloned' running pskb_expand_head if it is a +cloned gso skb (e.g if the packet has been sent though a veth device) + +[ 706.369655] BUG: KASAN: use-after-free in ipip6_rcv+0x1678/0x16e0 [sit] +[ 706.449056] Read of size 1 at addr ffffe01b6bd855f5 by task ksoftirqd/1/= +[ 706.669494] Hardware name: HPE ProLiant m400 Server/ProLiant m400 Server, BIOS U02 08/19/2016 +[ 706.771839] Call trace: +[ 706.801159] dump_backtrace+0x0/0x2f8 +[ 706.845079] show_stack+0x24/0x30 +[ 706.884833] dump_stack+0xe0/0x11c +[ 706.925629] print_address_description+0x68/0x260 +[ 706.982070] kasan_report+0x178/0x340 +[ 707.025995] __asan_report_load1_noabort+0x30/0x40 +[ 707.083481] ipip6_rcv+0x1678/0x16e0 [sit] +[ 707.132623] tunnel64_rcv+0xd4/0x200 [tunnel4] +[ 707.185940] ip_local_deliver_finish+0x3b8/0x988 +[ 707.241338] ip_local_deliver+0x144/0x470 +[ 707.289436] ip_rcv_finish+0x43c/0x14b0 +[ 707.335447] ip_rcv+0x628/0x1138 +[ 707.374151] __netif_receive_skb_core+0x1670/0x2600 +[ 707.432680] __netif_receive_skb+0x28/0x190 +[ 707.482859] process_backlog+0x1d0/0x610 +[ 707.529913] net_rx_action+0x37c/0xf68 +[ 707.574882] __do_softirq+0x288/0x1018 +[ 707.619852] run_ksoftirqd+0x70/0xa8 +[ 707.662734] smpboot_thread_fn+0x3a4/0x9e8 +[ 707.711875] kthread+0x2c8/0x350 +[ 707.750583] ret_from_fork+0x10/0x18 + +[ 707.811302] Allocated by task 16982: +[ 707.854182] kasan_kmalloc.part.1+0x40/0x108 +[ 707.905405] kasan_kmalloc+0xb4/0xc8 +[ 707.948291] kasan_slab_alloc+0x14/0x20 +[ 707.994309] __kmalloc_node_track_caller+0x158/0x5e0 +[ 708.053902] __kmalloc_reserve.isra.8+0x54/0xe0 +[ 708.108280] __alloc_skb+0xd8/0x400 +[ 708.150139] sk_stream_alloc_skb+0xa4/0x638 +[ 708.200346] tcp_sendmsg_locked+0x818/0x2b90 +[ 708.251581] tcp_sendmsg+0x40/0x60 +[ 708.292376] inet_sendmsg+0xf0/0x520 +[ 708.335259] sock_sendmsg+0xac/0xf8 +[ 708.377096] sock_write_iter+0x1c0/0x2c0 +[ 708.424154] new_sync_write+0x358/0x4a8 +[ 708.470162] __vfs_write+0xc4/0xf8 +[ 708.510950] vfs_write+0x12c/0x3d0 +[ 708.551739] ksys_write+0xcc/0x178 +[ 708.592533] __arm64_sys_write+0x70/0xa0 +[ 708.639593] el0_svc_handler+0x13c/0x298 +[ 708.686646] el0_svc+0x8/0xc + +[ 708.739019] Freed by task 17: +[ 708.774597] __kasan_slab_free+0x114/0x228 +[ 708.823736] kasan_slab_free+0x10/0x18 +[ 708.868703] kfree+0x100/0x3d8 +[ 708.905320] skb_free_head+0x7c/0x98 +[ 708.948204] skb_release_data+0x320/0x490 +[ 708.996301] pskb_expand_head+0x60c/0x970 +[ 709.044399] __iptunnel_pull_header+0x3b8/0x5d0 +[ 709.098770] ipip6_rcv+0x41c/0x16e0 [sit] +[ 709.146873] tunnel64_rcv+0xd4/0x200 [tunnel4] +[ 709.200195] ip_local_deliver_finish+0x3b8/0x988 +[ 709.255596] ip_local_deliver+0x144/0x470 +[ 709.303692] ip_rcv_finish+0x43c/0x14b0 +[ 709.349705] ip_rcv+0x628/0x1138 +[ 709.388413] __netif_receive_skb_core+0x1670/0x2600 +[ 709.446943] __netif_receive_skb+0x28/0x190 +[ 709.497120] process_backlog+0x1d0/0x610 +[ 709.544169] net_rx_action+0x37c/0xf68 +[ 709.589131] __do_softirq+0x288/0x1018 + +[ 709.651938] The buggy address belongs to the object at ffffe01b6bd85580 + which belongs to the cache kmalloc-1024 of size 1024 +[ 709.804356] The buggy address is located 117 bytes inside of + 1024-byte region [ffffe01b6bd85580, ffffe01b6bd85980) +[ 709.946340] The buggy address belongs to the page: +[ 710.003824] page:ffff7ff806daf600 count:1 mapcount:0 mapping:ffffe01c4001f600 index:0x0 +[ 710.099914] flags: 0xfffff8000000100(slab) +[ 710.149059] raw: 0fffff8000000100 dead000000000100 dead000000000200 ffffe01c4001f600 +[ 710.242011] raw: 0000000000000000 0000000000380038 00000001ffffffff 0000000000000000 +[ 710.334966] page dumped because: kasan: bad access detected + +Fix it resetting iph pointer after iptunnel_pull_header + +Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap") +Tested-by: Jianlin Shi +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/sit.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c +index 07e21a82ce4c..b2109b74857d 100644 +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb) + !net_eq(tunnel->net, dev_net(tunnel->dev)))) + goto out; + ++ /* skb can be uncloned in iptunnel_pull_header, so ++ * old iph is no longer valid ++ */ ++ iph = (const struct iphdr *)skb_mac_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) +-- +2.19.1 + diff --git a/queue-5.0/kcm-switch-order-of-device-registration-to-fix-a-cra.patch b/queue-5.0/kcm-switch-order-of-device-registration-to-fix-a-cra.patch new file mode 100644 index 0000000000..2293717527 --- /dev/null +++ b/queue-5.0/kcm-switch-order-of-device-registration-to-fix-a-cra.patch @@ -0,0 +1,94 @@ +From 47ea0519a55b1bb556e972fb58161439e6ad39c6 Mon Sep 17 00:00:00 2001 +From: Jiri Slaby +Date: Fri, 29 Mar 2019 12:19:46 +0100 +Subject: kcm: switch order of device registration to fix a crash + +[ Upstream commit 3c446e6f96997f2a95bf0037ef463802162d2323 ] + +When kcm is loaded while many processes try to create a KCM socket, a +crash occurs: + BUG: unable to handle kernel NULL pointer dereference at 000000000000000e + IP: mutex_lock+0x27/0x40 kernel/locking/mutex.c:240 + PGD 8000000016ef2067 P4D 8000000016ef2067 PUD 3d6e9067 PMD 0 + Oops: 0002 [#1] SMP KASAN PTI + CPU: 0 PID: 7005 Comm: syz-executor.5 Not tainted 4.12.14-396-default #1 SLE15-SP1 (unreleased) + RIP: 0010:mutex_lock+0x27/0x40 kernel/locking/mutex.c:240 + RSP: 0018:ffff88000d487a00 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 000000000000000e RCX: 1ffff100082b0719 + ... + CR2: 000000000000000e CR3: 000000004b1bc003 CR4: 0000000000060ef0 + Call Trace: + kcm_create+0x600/0xbf0 [kcm] + __sock_create+0x324/0x750 net/socket.c:1272 + ... + +This is due to race between sock_create and unfinished +register_pernet_device. kcm_create tries to do "net_generic(net, +kcm_net_id)". but kcm_net_id is not initialized yet. + +So switch the order of the two to close the race. + +This can be reproduced with mutiple processes doing socket(PF_KCM, ...) +and one process doing module removal. + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Reviewed-by: Michal Kubecek +Signed-off-by: Jiri Slaby +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/kcm/kcmsock.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c +index 571d824e4e24..b919db02c7f9 100644 +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -2054,14 +2054,14 @@ static int __init kcm_init(void) + if (err) + goto fail; + +- err = sock_register(&kcm_family_ops); +- if (err) +- goto sock_register_fail; +- + err = register_pernet_device(&kcm_net_ops); + if (err) + goto net_ops_fail; + ++ err = sock_register(&kcm_family_ops); ++ if (err) ++ goto sock_register_fail; ++ + err = kcm_proc_init(); + if (err) + goto proc_init_fail; +@@ -2069,12 +2069,12 @@ static int __init kcm_init(void) + return 0; + + proc_init_fail: +- unregister_pernet_device(&kcm_net_ops); +- +-net_ops_fail: + sock_unregister(PF_KCM); + + sock_register_fail: ++ unregister_pernet_device(&kcm_net_ops); ++ ++net_ops_fail: + proto_unregister(&kcm_proto); + + fail: +@@ -2090,8 +2090,8 @@ static int __init kcm_init(void) + static void __exit kcm_exit(void) + { + kcm_proc_exit(); +- unregister_pernet_device(&kcm_net_ops); + sock_unregister(PF_KCM); ++ unregister_pernet_device(&kcm_net_ops); + proto_unregister(&kcm_proto); + destroy_workqueue(kcm_wq); + +-- +2.19.1 + diff --git a/queue-5.0/net-bridge-always-clear-mcast-matching-struct-on-rep.patch b/queue-5.0/net-bridge-always-clear-mcast-matching-struct-on-rep.patch new file mode 100644 index 0000000000..36edb8df46 --- /dev/null +++ b/queue-5.0/net-bridge-always-clear-mcast-matching-struct-on-rep.patch @@ -0,0 +1,78 @@ +From e395d7de168305f53547cc28981aa9587bda8c87 Mon Sep 17 00:00:00 2001 +From: Nikolay Aleksandrov +Date: Wed, 3 Apr 2019 23:27:24 +0300 +Subject: net: bridge: always clear mcast matching struct on reports and leaves + +[ Upstream commit 1515a63fc413f160d20574ab0894e7f1020c7be2 ] + +We need to be careful and always zero the whole br_ip struct when it is +used for matching since the rhashtable change. This patch fixes all the +places which didn't properly clear it which in turn might've caused +mismatches. + +Thanks for the great bug report with reproducing steps and bisection. + +Steps to reproduce (from the bug report): +ip link add br0 type bridge mcast_querier 1 +ip link set br0 up + +ip link add v2 type veth peer name v3 +ip link set v2 master br0 +ip link set v2 up +ip link set v3 up +ip addr add 3.0.0.2/24 dev v3 + +ip netns add test +ip link add v1 type veth peer name v1 netns test +ip link set v1 master br0 +ip link set v1 up +ip -n test link set v1 up +ip -n test addr add 3.0.0.1/24 dev v1 + +# Multicast receiver +ip netns exec test socat +UDP4-RECVFROM:5588,ip-add-membership=224.224.224.224:3.0.0.1,fork - + +# Multicast sender +echo hello | nc -u -s 3.0.0.2 224.224.224.224 5588 + +Reported-by: liam.mcbirnie@boeing.com +Fixes: 19e3a9c90c53 ("net: bridge: convert multicast to generic rhashtable") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/bridge/br_multicast.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index ac92b2eb32b1..e4777614a8a0 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -599,6 +599,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, + if (ipv4_is_local_multicast(group)) + return 0; + ++ memset(&br_group, 0, sizeof(br_group)); + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; +@@ -1489,6 +1490,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, + + own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + ++ memset(&br_group, 0, sizeof(br_group)); + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; +@@ -1512,6 +1514,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, + + own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + ++ memset(&br_group, 0, sizeof(br_group)); + br_group.u.ip6 = *group; + br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; +-- +2.19.1 + diff --git a/queue-5.0/net-core-netif_receive_skb_list-unlist-skb-before-pa.patch b/queue-5.0/net-core-netif_receive_skb_list-unlist-skb-before-pa.patch new file mode 100644 index 0000000000..9220be582a --- /dev/null +++ b/queue-5.0/net-core-netif_receive_skb_list-unlist-skb-before-pa.patch @@ -0,0 +1,95 @@ +From 6d178e814131df4f8df387fcd72be1d47beacce5 Mon Sep 17 00:00:00 2001 +From: Alexander Lobakin +Date: Thu, 28 Mar 2019 18:23:04 +0300 +Subject: net: core: netif_receive_skb_list: unlist skb before passing to + pt->func + +[ Upstream commit 9a5a90d167b0e5fe3d47af16b68fd09ce64085cd ] + +__netif_receive_skb_list_ptype() leaves skb->next poisoned before passing +it to pt_prev->func handler, what may produce (in certain cases, e.g. DSA +setup) crashes like: + +[ 88.606777] CPU 0 Unable to handle kernel paging request at virtual address 0000000e, epc == 80687078, ra == 8052cc7c +[ 88.618666] Oops[#1]: +[ 88.621196] CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc2-dlink-00206-g4192a172-dirty #1473 +[ 88.630885] $ 0 : 00000000 10000400 00000002 864d7850 +[ 88.636709] $ 4 : 87c0ddf0 864d7800 87c0ddf0 00000000 +[ 88.642526] $ 8 : 00000000 49600000 00000001 00000001 +[ 88.648342] $12 : 00000000 c288617b dadbee27 25d17c41 +[ 88.654159] $16 : 87c0ddf0 85cff080 80790000 fffffffd +[ 88.659975] $20 : 80797b20 ffffffff 00000001 864d7800 +[ 88.665793] $24 : 00000000 8011e658 +[ 88.671609] $28 : 80790000 87c0dbc0 87cabf00 8052cc7c +[ 88.677427] Hi : 00000003 +[ 88.680622] Lo : 7b5b4220 +[ 88.683840] epc : 80687078 vlan_dev_hard_start_xmit+0x1c/0x1a0 +[ 88.690532] ra : 8052cc7c dev_hard_start_xmit+0xac/0x188 +[ 88.696734] Status: 10000404 IEp +[ 88.700422] Cause : 50000008 (ExcCode 02) +[ 88.704874] BadVA : 0000000e +[ 88.708069] PrId : 0001a120 (MIPS interAptiv (multi)) +[ 88.713005] Modules linked in: +[ 88.716407] Process swapper (pid: 0, threadinfo=(ptrval), task=(ptrval), tls=00000000) +[ 88.725219] Stack : 85f61c28 00000000 0000000e 80780000 87c0ddf0 85cff080 80790000 8052cc7c +[ 88.734529] 87cabf00 00000000 00000001 85f5fb40 807b0000 864d7850 87cabf00 807d0000 +[ 88.743839] 864d7800 8655f600 00000000 85cff080 87c1c000 0000006a 00000000 8052d96c +[ 88.753149] 807a0000 8057adb8 87c0dcc8 87c0dc50 85cfff08 00000558 87cabf00 85f58c50 +[ 88.762460] 00000002 85f58c00 864d7800 80543308 fffffff4 00000001 85f58c00 864d7800 +[ 88.771770] ... +[ 88.774483] Call Trace: +[ 88.777199] [<80687078>] vlan_dev_hard_start_xmit+0x1c/0x1a0 +[ 88.783504] [<8052cc7c>] dev_hard_start_xmit+0xac/0x188 +[ 88.789326] [<8052d96c>] __dev_queue_xmit+0x6e8/0x7d4 +[ 88.794955] [<805a8640>] ip_finish_output2+0x238/0x4d0 +[ 88.800677] [<805ab6a0>] ip_output+0xc8/0x140 +[ 88.805526] [<805a68f4>] ip_forward+0x364/0x560 +[ 88.810567] [<805a4ff8>] ip_rcv+0x48/0xe4 +[ 88.815030] [<80528d44>] __netif_receive_skb_one_core+0x44/0x58 +[ 88.821635] [<8067f220>] dsa_switch_rcv+0x108/0x1ac +[ 88.827067] [<80528f80>] __netif_receive_skb_list_core+0x228/0x26c +[ 88.833951] [<8052ed84>] netif_receive_skb_list+0x1d4/0x394 +[ 88.840160] [<80355a88>] lunar_rx_poll+0x38c/0x828 +[ 88.845496] [<8052fa78>] net_rx_action+0x14c/0x3cc +[ 88.850835] [<806ad300>] __do_softirq+0x178/0x338 +[ 88.856077] [<8012a2d4>] irq_exit+0xbc/0x100 +[ 88.860846] [<802f8b70>] plat_irq_dispatch+0xc0/0x144 +[ 88.866477] [<80105974>] handle_int+0x14c/0x158 +[ 88.871516] [<806acfb0>] r4k_wait+0x30/0x40 +[ 88.876462] Code: afb10014 8c8200a0 00803025 <9443000c> 94a20468 00000000 10620042 00a08025 9605046a +[ 88.887332] +[ 88.888982] ---[ end trace eb863d007da11cf1 ]--- +[ 88.894122] Kernel panic - not syncing: Fatal exception in interrupt +[ 88.901202] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- + +Fix this by pulling skb off the sublist and zeroing skb->next pointer +before calling ptype callback. + +Fixes: 88eb1944e18c ("net: core: propagate SKB lists through packet_type lookup") +Reviewed-by: Edward Cree +Signed-off-by: Alexander Lobakin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/dev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/core/dev.c b/net/core/dev.c +index 5d03889502eb..12824e007e06 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5014,8 +5014,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, + if (pt_prev->list_func != NULL) + pt_prev->list_func(head, pt_prev, orig_dev); + else +- list_for_each_entry_safe(skb, next, head, list) ++ list_for_each_entry_safe(skb, next, head, list) { ++ skb_list_del_init(skb); + pt_prev->func(skb, skb->dev, pt_prev, orig_dev); ++ } + } + + static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc) +-- +2.19.1 + diff --git a/queue-5.0/net-ethtool-not-call-vzalloc-for-zero-sized-memory-r.patch b/queue-5.0/net-ethtool-not-call-vzalloc-for-zero-sized-memory-r.patch new file mode 100644 index 0000000000..371e9fb160 --- /dev/null +++ b/queue-5.0/net-ethtool-not-call-vzalloc-for-zero-sized-memory-r.patch @@ -0,0 +1,104 @@ +From 02b3cbc135549e2ad979efa8e30bd457afd82c5f Mon Sep 17 00:00:00 2001 +From: Li RongQing +Date: Fri, 29 Mar 2019 09:18:02 +0800 +Subject: net: ethtool: not call vzalloc for zero sized memory request + +[ Upstream commit 3d8830266ffc28c16032b859e38a0252e014b631 ] + +NULL or ZERO_SIZE_PTR will be returned for zero sized memory +request, and derefencing them will lead to a segfault + +so it is unnecessory to call vzalloc for zero sized memory +request and not call functions which maybe derefence the +NULL allocated memory + +this also fixes a possible memory leak if phy_ethtool_get_stats +returns error, memory should be freed before exit + +Signed-off-by: Li RongQing +Reviewed-by: Wang Li +Reviewed-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/ethtool.c | 46 ++++++++++++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 16 deletions(-) + +diff --git a/net/core/ethtool.c b/net/core/ethtool.c +index 158264f7cfaf..3a7f19a61768 100644 +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -1794,11 +1794,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) + WARN_ON_ONCE(!ret); + + gstrings.len = ret; +- data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); +- if (gstrings.len && !data) +- return -ENOMEM; + +- __ethtool_get_strings(dev, gstrings.string_set, data); ++ if (gstrings.len) { ++ data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); ++ if (!data) ++ return -ENOMEM; ++ ++ __ethtool_get_strings(dev, gstrings.string_set, data); ++ } else { ++ data = NULL; ++ } + + ret = -EFAULT; + if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) +@@ -1894,11 +1899,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) + return -EFAULT; + + stats.n_stats = n_stats; +- data = vzalloc(array_size(n_stats, sizeof(u64))); +- if (n_stats && !data) +- return -ENOMEM; + +- ops->get_ethtool_stats(dev, &stats, data); ++ if (n_stats) { ++ data = vzalloc(array_size(n_stats, sizeof(u64))); ++ if (!data) ++ return -ENOMEM; ++ ops->get_ethtool_stats(dev, &stats, data); ++ } else { ++ data = NULL; ++ } + + ret = -EFAULT; + if (copy_to_user(useraddr, &stats, sizeof(stats))) +@@ -1938,16 +1947,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) + return -EFAULT; + + stats.n_stats = n_stats; +- data = vzalloc(array_size(n_stats, sizeof(u64))); +- if (n_stats && !data) +- return -ENOMEM; + +- if (dev->phydev && !ops->get_ethtool_phy_stats) { +- ret = phy_ethtool_get_stats(dev->phydev, &stats, data); +- if (ret < 0) +- return ret; ++ if (n_stats) { ++ data = vzalloc(array_size(n_stats, sizeof(u64))); ++ if (!data) ++ return -ENOMEM; ++ ++ if (dev->phydev && !ops->get_ethtool_phy_stats) { ++ ret = phy_ethtool_get_stats(dev->phydev, &stats, data); ++ if (ret < 0) ++ goto out; ++ } else { ++ ops->get_ethtool_phy_stats(dev, &stats, data); ++ } + } else { +- ops->get_ethtool_phy_stats(dev, &stats, data); ++ data = NULL; + } + + ret = -EFAULT; +-- +2.19.1 + diff --git a/queue-5.0/net-gro-fix-gro-flush-when-receiving-a-gso-packet.patch b/queue-5.0/net-gro-fix-gro-flush-when-receiving-a-gso-packet.patch new file mode 100644 index 0000000000..3f8ded9fba --- /dev/null +++ b/queue-5.0/net-gro-fix-gro-flush-when-receiving-a-gso-packet.patch @@ -0,0 +1,42 @@ +From 9f79e021de58e4682c44cef09f22ab02d4bef741 Mon Sep 17 00:00:00 2001 +From: Steffen Klassert +Date: Tue, 2 Apr 2019 08:16:03 +0200 +Subject: net-gro: Fix GRO flush when receiving a GSO packet. + +[ Upstream commit 0ab03f353d3613ea49d1f924faf98559003670a8 ] + +Currently we may merge incorrectly a received GSO packet +or a packet with frag_list into a packet sitting in the +gro_hash list. skb_segment() may crash case because +the assumptions on the skb layout are not met. +The correct behaviour would be to flush the packet in the +gro_hash list and send the received GSO packet directly +afterwards. Commit d61d072e87c8e ("net-gro: avoid reorders") +sets NAPI_GRO_CB(skb)->flush in this case, but this is not +checked before merging. This patch makes sure to check this +flag and to not merge in that case. + +Fixes: d61d072e87c8e ("net-gro: avoid reorders") +Signed-off-by: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/skbuff.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 2415d9cb9b89..ef2cd5712098 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3801,7 +3801,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) + unsigned int delta_truesize; + struct sk_buff *lp; + +- if (unlikely(p->len + len >= 65536)) ++ if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) + return -E2BIG; + + lp = NAPI_GRO_CB(p)->last; +-- +2.19.1 + diff --git a/queue-5.0/net-ip6_gre-fix-possible-use-after-free-in-ip6erspan.patch b/queue-5.0/net-ip6_gre-fix-possible-use-after-free-in-ip6erspan.patch new file mode 100644 index 0000000000..5b5401dcbe --- /dev/null +++ b/queue-5.0/net-ip6_gre-fix-possible-use-after-free-in-ip6erspan.patch @@ -0,0 +1,88 @@ +From 0bd961d3bf9f4346ed4aa1a574d608a768bda63f Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Sat, 6 Apr 2019 17:16:53 +0200 +Subject: net: ip6_gre: fix possible use-after-free in ip6erspan_rcv + +[ Upstream commit 2a3cabae4536edbcb21d344e7aa8be7a584d2afb ] + +erspan_v6 tunnels run __iptunnel_pull_header on received skbs to remove +erspan header. This can determine a possible use-after-free accessing +pkt_md pointer in ip6erspan_rcv since the packet will be 'uncloned' +running pskb_expand_head if it is a cloned gso skb (e.g if the packet has +been sent though a veth device). Fix it resetting pkt_md pointer after +__iptunnel_pull_header + +Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_gre.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c +index 26f25b6e2833..438f1a5fd19a 100644 +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -524,11 +524,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) + return PACKET_REJECT; + } + +-static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, +- struct tnl_ptk_info *tpi) ++static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, ++ int gre_hdr_len) + { + struct erspan_base_hdr *ershdr; +- struct erspan_metadata *pkt_md; + const struct ipv6hdr *ipv6h; + struct erspan_md2 *md2; + struct ip6_tnl *tunnel; +@@ -547,18 +546,16 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, + if (unlikely(!pskb_may_pull(skb, len))) + return PACKET_REJECT; + +- ershdr = (struct erspan_base_hdr *)skb->data; +- pkt_md = (struct erspan_metadata *)(ershdr + 1); +- + if (__iptunnel_pull_header(skb, len, + htons(ETH_P_TEB), + false, false) < 0) + return PACKET_REJECT; + + if (tunnel->parms.collect_md) { ++ struct erspan_metadata *pkt_md, *md; + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; +- struct erspan_metadata *md; ++ unsigned char *gh; + __be64 tun_id; + __be16 flags; + +@@ -571,6 +568,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, + if (!tun_dst) + return PACKET_REJECT; + ++ /* skb can be uncloned in __iptunnel_pull_header, so ++ * old pkt_md is no longer valid and we need to reset ++ * it ++ */ ++ gh = skb_network_header(skb) + ++ skb_network_header_len(skb); ++ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + ++ sizeof(*ershdr)); + info = &tun_dst->u.tun_info; + md = ip_tunnel_info_opts(info); + md->version = ver; +@@ -607,7 +612,7 @@ static int gre_rcv(struct sk_buff *skb) + + if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || + tpi.proto == htons(ETH_P_ERSPAN2))) { +- if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) ++ if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) + return 0; + goto out; + } +-- +2.19.1 + diff --git a/queue-5.0/net-ip_gre-fix-possible-use-after-free-in-erspan_rcv.patch b/queue-5.0/net-ip_gre-fix-possible-use-after-free-in-erspan_rcv.patch new file mode 100644 index 0000000000..1c34cbc0ca --- /dev/null +++ b/queue-5.0/net-ip_gre-fix-possible-use-after-free-in-erspan_rcv.patch @@ -0,0 +1,73 @@ +From bf8a9848c2a01e86b99c72a0a490eda1ece7ab06 Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Sat, 6 Apr 2019 17:16:52 +0200 +Subject: net: ip_gre: fix possible use-after-free in erspan_rcv + +[ Upstream commit 492b67e28ee5f2a2522fb72e3d3bcb990e461514 ] + +erspan tunnels run __iptunnel_pull_header on received skbs to remove +gre and erspan headers. This can determine a possible use-after-free +accessing pkt_md pointer in erspan_rcv since the packet will be 'uncloned' +running pskb_expand_head if it is a cloned gso skb (e.g if the packet has +been sent though a veth device). Fix it resetting pkt_md pointer after +__iptunnel_pull_header + +Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/ip_gre.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c +index 6ae89f2b541b..2d5734079e6b 100644 +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, + struct net *net = dev_net(skb->dev); + struct metadata_dst *tun_dst = NULL; + struct erspan_base_hdr *ershdr; +- struct erspan_metadata *pkt_md; + struct ip_tunnel_net *itn; + struct ip_tunnel *tunnel; + const struct iphdr *iph; +@@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, + if (unlikely(!pskb_may_pull(skb, len))) + return PACKET_REJECT; + +- ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); +- pkt_md = (struct erspan_metadata *)(ershdr + 1); +- + if (__iptunnel_pull_header(skb, + len, + htons(ETH_P_TEB), +@@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, + goto drop; + + if (tunnel->collect_md) { ++ struct erspan_metadata *pkt_md, *md; + struct ip_tunnel_info *info; +- struct erspan_metadata *md; ++ unsigned char *gh; + __be64 tun_id; + __be16 flags; + +@@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, + if (!tun_dst) + return PACKET_REJECT; + ++ /* skb can be uncloned in __iptunnel_pull_header, so ++ * old pkt_md is no longer valid and we need to reset ++ * it ++ */ ++ gh = skb_network_header(skb) + ++ skb_network_header_len(skb); ++ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + ++ sizeof(*ershdr)); + md = ip_tunnel_info_opts(&tun_dst->u.tun_info); + md->version = ver; + md2 = &md->u.md2; +-- +2.19.1 + diff --git a/queue-5.0/net-mlx5-add-a-missing-check-on-idr_find-free-buf.patch b/queue-5.0/net-mlx5-add-a-missing-check-on-idr_find-free-buf.patch new file mode 100644 index 0000000000..c03998f3ff --- /dev/null +++ b/queue-5.0/net-mlx5-add-a-missing-check-on-idr_find-free-buf.patch @@ -0,0 +1,63 @@ +From f1c2fbe6f46a9871d21bfa0a7af6e5a5da017707 Mon Sep 17 00:00:00 2001 +From: Aditya Pakki +Date: Tue, 19 Mar 2019 16:42:40 -0500 +Subject: net: mlx5: Add a missing check on idr_find, free buf + +[ Upstream commit 8e949363f017e2011464812a714fb29710fb95b4 ] + +idr_find() can return a NULL value to 'flow' which is used without a +check. The patch adds a check to avoid potential NULL pointer dereference. + +In case of mlx5_fpga_sbu_conn_sendmsg() failure, free buf allocated +using kzalloc. + +Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") +Signed-off-by: Aditya Pakki +Reviewed-by: Yuval Shaia +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +index 5cf5f2a9d51f..8de64e88c670 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +@@ -217,15 +217,21 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, + void *cmd; + int ret; + ++ rcu_read_lock(); ++ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); ++ rcu_read_unlock(); ++ ++ if (!flow) { ++ WARN_ONCE(1, "Received NULL pointer for handle\n"); ++ return -EINVAL; ++ } ++ + buf = kzalloc(size, GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + cmd = (buf + 1); + +- rcu_read_lock(); +- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); +- rcu_read_unlock(); + mlx5_fpga_tls_flow_to_cmd(flow, cmd); + + MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); +@@ -238,6 +244,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, + buf->complete = mlx_tls_kfree_complete; + + ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); ++ if (ret < 0) ++ kfree(buf); + + return ret; + } +-- +2.19.1 + diff --git a/queue-5.0/net-mlx5-decrease-default-mr-cache-size.patch b/queue-5.0/net-mlx5-decrease-default-mr-cache-size.patch new file mode 100644 index 0000000000..6304e7bcfd --- /dev/null +++ b/queue-5.0/net-mlx5-decrease-default-mr-cache-size.patch @@ -0,0 +1,60 @@ +From 62347b0351a73fae9e23ebf6d5e24211c254fc3a Mon Sep 17 00:00:00 2001 +From: Artemy Kovalyov +Date: Tue, 19 Mar 2019 11:24:38 +0200 +Subject: net/mlx5: Decrease default mr cache size + +[ Upstream commit e8b26b2135dedc0284490bfeac06dfc4418d0105 ] + +Delete initialization of high order entries in mr cache to decrease initial +memory footprint. When required, the administrator can populate the +entries with memory keys via the /sys interface. + +This approach is very helpful to significantly reduce the per HW function +memory footprint in virtualization environments such as SRIOV. + +Fixes: 9603b61de1ee ("mlx5: Move pci device handling from mlx5_ib to mlx5_core") +Signed-off-by: Artemy Kovalyov +Signed-off-by: Moni Shoua +Signed-off-by: Leon Romanovsky +Reported-by: Shalom Toledo +Acked-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/main.c | 20 ------------------- + 1 file changed, 20 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index be81b319b0dc..694edd899322 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -163,26 +163,6 @@ static struct mlx5_profile profile[] = { + .size = 8, + .limit = 4 + }, +- .mr_cache[16] = { +- .size = 8, +- .limit = 4 +- }, +- .mr_cache[17] = { +- .size = 8, +- .limit = 4 +- }, +- .mr_cache[18] = { +- .size = 8, +- .limit = 4 +- }, +- .mr_cache[19] = { +- .size = 4, +- .limit = 2 +- }, +- .mr_cache[20] = { +- .size = 4, +- .limit = 2 +- }, + }, + }; + +-- +2.19.1 + diff --git a/queue-5.0/net-mlx5e-add-a-lock-on-tir-list.patch b/queue-5.0/net-mlx5e-add-a-lock-on-tir-list.patch new file mode 100644 index 0000000000..ec63f6b295 --- /dev/null +++ b/queue-5.0/net-mlx5e-add-a-lock-on-tir-list.patch @@ -0,0 +1,85 @@ +From 44064e39efe00c8efbab5331f7996490134a01b6 Mon Sep 17 00:00:00 2001 +From: Yuval Avnery +Date: Mon, 11 Mar 2019 06:18:24 +0200 +Subject: net/mlx5e: Add a lock on tir list + +[ Upstream commit 80a2a9026b24c6bd34b8d58256973e22270bedec ] + +Refresh tirs is looping over a global list of tirs while netdevs are +adding and removing tirs from that list. That is why a lock is +required. + +Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring") +Signed-off-by: Yuval Avnery +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 7 +++++++ + include/linux/mlx5/driver.h | 2 ++ + 2 files changed, 9 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +index 8100786f6fb5..1539cf3de5dc 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +@@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, + if (err) + return err; + ++ mutex_lock(&mdev->mlx5e_res.td.list_lock); + list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); ++ mutex_unlock(&mdev->mlx5e_res.td.list_lock); + + return 0; + } +@@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, + void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir) + { ++ mutex_lock(&mdev->mlx5e_res.td.list_lock); + mlx5_core_destroy_tir(mdev, tir->tirn); + list_del(&tir->list); ++ mutex_unlock(&mdev->mlx5e_res.td.list_lock); + } + + static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, +@@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) + } + + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); ++ mutex_init(&mdev->mlx5e_res.td.list_lock); + + return 0; + +@@ -159,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + ++ mutex_lock(&mdev->mlx5e_res.td.list_lock); + list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); +@@ -170,6 +176,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) + kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); ++ mutex_unlock(&mdev->mlx5e_res.td.list_lock); + + return err; + } +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index 54299251d40d..4f001619f854 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -591,6 +591,8 @@ enum mlx5_pagefault_type_flags { + }; + + struct mlx5_td { ++ /* protects tirs list changes while tirs refresh */ ++ struct mutex list_lock; + struct list_head tirs_list; + u32 tdn; + }; +-- +2.19.1 + diff --git a/queue-5.0/net-mlx5e-fix-error-handling-when-refreshing-tirs.patch b/queue-5.0/net-mlx5e-fix-error-handling-when-refreshing-tirs.patch new file mode 100644 index 0000000000..718579e1a2 --- /dev/null +++ b/queue-5.0/net-mlx5e-fix-error-handling-when-refreshing-tirs.patch @@ -0,0 +1,48 @@ +From 5b45fef2f77945c4d3efd04ead0752a074e0dfd2 Mon Sep 17 00:00:00 2001 +From: Gavi Teitz +Date: Mon, 11 Mar 2019 11:56:34 +0200 +Subject: net/mlx5e: Fix error handling when refreshing TIRs + +[ Upstream commit bc87a0036826a37b43489b029af8143bd07c6cca ] + +Previously, a false positive would be caught if the TIRs list is +empty, since the err value was initialized to -ENOMEM, and was only +updated if a TIR is refreshed. This is resolved by initializing the +err value to zero. + +Fixes: b676f653896a ("net/mlx5e: Refactor refresh TIRs") +Signed-off-by: Gavi Teitz +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +index 3078491cc0d0..8100786f6fb5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +@@ -141,15 +141,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) + { + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_tir *tir; +- int err = -ENOMEM; ++ int err = 0; + u32 tirn = 0; + int inlen; + void *in; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); +- if (!in) ++ if (!in) { ++ err = -ENOMEM; + goto out; ++ } + + if (enable_uc_lb) + MLX5_SET(modify_tir_in, in, ctx.self_lb_block, +-- +2.19.1 + diff --git a/queue-5.0/net-mlx5e-update-xoff-formula.patch b/queue-5.0/net-mlx5e-update-xoff-formula.patch new file mode 100644 index 0000000000..b84f37ae0c --- /dev/null +++ b/queue-5.0/net-mlx5e-update-xoff-formula.patch @@ -0,0 +1,49 @@ +From 1c16603954147ebe4b3bb2cb049fbeaa17646e81 Mon Sep 17 00:00:00 2001 +From: Huy Nguyen +Date: Thu, 7 Mar 2019 14:49:50 -0600 +Subject: net/mlx5e: Update xoff formula + +[ Upstream commit 5ec983e924c7978aaec3cf8679ece9436508bb20 ] + +Set minimum speed in xoff threshold formula to 40Gbps + +Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") +Signed-off-by: Huy Nguyen +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +index eac245a93f91..f00de0c987cd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +@@ -122,7 +122,9 @@ static int port_set_buffer(struct mlx5e_priv *priv, + return err; + } + +-/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */ ++/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) ++ * minimum speed value is 40Gbps ++ */ + static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) + { + u32 speed; +@@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) + int err; + + err = mlx5e_port_linkspeed(priv->mdev, &speed); +- if (err) { +- mlx5_core_warn(priv->mdev, "cannot get port speed\n"); +- return 0; +- } ++ if (err) ++ speed = SPEED_40000; ++ speed = max_t(u32, speed, SPEED_40000); + + xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; + +-- +2.19.1 + diff --git a/queue-5.0/net-mlx5e-update-xon-formula.patch b/queue-5.0/net-mlx5e-update-xon-formula.patch new file mode 100644 index 0000000000..f63e52fe7b --- /dev/null +++ b/queue-5.0/net-mlx5e-update-xon-formula.patch @@ -0,0 +1,147 @@ +From b5cdbd732afacffe05eed172f68b0cb162da60d3 Mon Sep 17 00:00:00 2001 +From: Huy Nguyen +Date: Thu, 7 Mar 2019 14:07:32 -0600 +Subject: net/mlx5e: Update xon formula + +[ Upstream commit e28408e98bced123038857b6e3c81fa12a2e3e68 ] + +Set xon = xoff - netdev's max_mtu. +netdev's max_mtu will give enough time for the pause frame to +arrive at the sender. + +Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") +Signed-off-by: Huy Nguyen +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en/port_buffer.c | 28 +++++++++++-------- + 1 file changed, 16 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +index f00de0c987cd..4ab0d030b544 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +@@ -143,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) + } + + static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, +- u32 xoff, unsigned int mtu) ++ u32 xoff, unsigned int max_mtu) + { + int i; + +@@ -155,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, + } + + if (port_buffer->buffer[i].size < +- (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) ++ (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) + return -ENOMEM; + + port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; +- port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu; ++ port_buffer->buffer[i].xon = ++ port_buffer->buffer[i].xoff - max_mtu; + } + + return 0; +@@ -167,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, + + /** + * update_buffer_lossy() +- * mtu: device's MTU ++ * max_mtu: netdev's max_mtu + * pfc_en: current pfc configuration + * buffer: current prio to buffer mapping + * xoff: xoff value +@@ -184,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, + * Return 0 if no error. + * Set change to true if buffer configuration is modified. + */ +-static int update_buffer_lossy(unsigned int mtu, ++static int update_buffer_lossy(unsigned int max_mtu, + u8 pfc_en, u8 *buffer, u32 xoff, + struct mlx5e_port_buffer *port_buffer, + bool *change) +@@ -221,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu, + } + + if (changed) { +- err = update_xoff_threshold(port_buffer, xoff, mtu); ++ err = update_xoff_threshold(port_buffer, xoff, max_mtu); + if (err) + return err; + +@@ -231,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu, + return 0; + } + ++#define MINIMUM_MAX_MTU 9216 + int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, +@@ -242,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + bool update_prio2buffer = false; + u8 buffer[MLX5E_MAX_PRIORITY]; + bool update_buffer = false; ++ unsigned int max_mtu; + u32 total_used = 0; + u8 curr_pfc_en; + int err; + int i; + + mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); ++ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) +@@ -255,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + + if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { + update_buffer = true; +- err = update_xoff_threshold(&port_buffer, xoff, mtu); ++ err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + if (err) + return err; + } +@@ -265,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + if (err) + return err; + +- err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff, ++ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, + &port_buffer, &update_buffer); + if (err) + return err; +@@ -277,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + if (err) + return err; + +- err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff, +- &port_buffer, &update_buffer); ++ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, ++ xoff, &port_buffer, &update_buffer); + if (err) + return err; + } +@@ -302,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + return -EINVAL; + + update_buffer = true; +- err = update_xoff_threshold(&port_buffer, xoff, mtu); ++ err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + if (err) + return err; + } +@@ -310,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + /* Need to update buffer configuration if xoff value is changed */ + if (!update_buffer && xoff != priv->dcbx.xoff) { + update_buffer = true; +- err = update_xoff_threshold(&port_buffer, xoff, mtu); ++ err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + if (err) + return err; + } +-- +2.19.1 + diff --git a/queue-5.0/net-rds-force-to-destroy-connection-if-t_sock-is-nul.patch b/queue-5.0/net-rds-force-to-destroy-connection-if-t_sock-is-nul.patch new file mode 100644 index 0000000000..691f80a8a5 --- /dev/null +++ b/queue-5.0/net-rds-force-to-destroy-connection-if-t_sock-is-nul.patch @@ -0,0 +1,148 @@ +From ef706a1b3eedca723517d268f69e20c53c42ec37 Mon Sep 17 00:00:00 2001 +From: Mao Wenan +Date: Thu, 28 Mar 2019 17:10:56 +0800 +Subject: net: rds: force to destroy connection if t_sock is NULL in + rds_tcp_kill_sock(). + +[ Upstream commit cb66ddd156203daefb8d71158036b27b0e2caf63 ] + +When it is to cleanup net namespace, rds_tcp_exit_net() will call +rds_tcp_kill_sock(), if t_sock is NULL, it will not call +rds_conn_destroy(), rds_conn_path_destroy() and rds_tcp_conn_free() to free +connection, and the worker cp_conn_w is not stopped, afterwards the net is freed in +net_drop_ns(); While cp_conn_w rds_connect_worker() will call rds_tcp_conn_path_connect() +and reference 'net' which has already been freed. + +In rds_tcp_conn_path_connect(), rds_tcp_set_callbacks() will set t_sock = sock before +sock->ops->connect, but if connect() is failed, it will call +rds_tcp_restore_callbacks() and set t_sock = NULL, if connect is always +failed, rds_connect_worker() will try to reconnect all the time, so +rds_tcp_kill_sock() will never to cancel worker cp_conn_w and free the +connections. + +Therefore, the condition !tc->t_sock is not needed if it is going to do +cleanup_net->rds_tcp_exit_net->rds_tcp_kill_sock, because tc->t_sock is always +NULL, and there is on other path to cancel cp_conn_w and free +connection. So this patch is to fix this. + +rds_tcp_kill_sock(): +... +if (net != c_net || !tc->t_sock) +... +Acked-by: Santosh Shilimkar + +================================================================== +BUG: KASAN: use-after-free in inet_create+0xbcc/0xd28 +net/ipv4/af_inet.c:340 +Read of size 4 at addr ffff8003496a4684 by task kworker/u8:4/3721 + +CPU: 3 PID: 3721 Comm: kworker/u8:4 Not tainted 5.1.0 #11 +Hardware name: linux,dummy-virt (DT) +Workqueue: krdsd rds_connect_worker +Call trace: + dump_backtrace+0x0/0x3c0 arch/arm64/kernel/time.c:53 + show_stack+0x28/0x38 arch/arm64/kernel/traps.c:152 + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x120/0x188 lib/dump_stack.c:113 + print_address_description+0x68/0x278 mm/kasan/report.c:253 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x21c/0x348 mm/kasan/report.c:409 + __asan_report_load4_noabort+0x30/0x40 mm/kasan/report.c:429 + inet_create+0xbcc/0xd28 net/ipv4/af_inet.c:340 + __sock_create+0x4f8/0x770 net/socket.c:1276 + sock_create_kern+0x50/0x68 net/socket.c:1322 + rds_tcp_conn_path_connect+0x2b4/0x690 net/rds/tcp_connect.c:114 + rds_connect_worker+0x108/0x1d0 net/rds/threads.c:175 + process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153 + worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296 + kthread+0x2f0/0x378 kernel/kthread.c:255 + ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117 + +Allocated by task 687: + save_stack mm/kasan/kasan.c:448 [inline] + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xd4/0x180 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x14/0x20 mm/kasan/kasan.c:490 + slab_post_alloc_hook mm/slab.h:444 [inline] + slab_alloc_node mm/slub.c:2705 [inline] + slab_alloc mm/slub.c:2713 [inline] + kmem_cache_alloc+0x14c/0x388 mm/slub.c:2718 + kmem_cache_zalloc include/linux/slab.h:697 [inline] + net_alloc net/core/net_namespace.c:384 [inline] + copy_net_ns+0xc4/0x2d0 net/core/net_namespace.c:424 + create_new_namespaces+0x300/0x658 kernel/nsproxy.c:107 + unshare_nsproxy_namespaces+0xa0/0x198 kernel/nsproxy.c:206 + ksys_unshare+0x340/0x628 kernel/fork.c:2577 + __do_sys_unshare kernel/fork.c:2645 [inline] + __se_sys_unshare kernel/fork.c:2643 [inline] + __arm64_sys_unshare+0x38/0x58 kernel/fork.c:2643 + __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:47 [inline] + el0_svc_common+0x168/0x390 arch/arm64/kernel/syscall.c:83 + el0_svc_handler+0x60/0xd0 arch/arm64/kernel/syscall.c:129 + el0_svc+0x8/0xc arch/arm64/kernel/entry.S:960 + +Freed by task 264: + save_stack mm/kasan/kasan.c:448 [inline] + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x114/0x220 mm/kasan/kasan.c:521 + kasan_slab_free+0x10/0x18 mm/kasan/kasan.c:528 + slab_free_hook mm/slub.c:1370 [inline] + slab_free_freelist_hook mm/slub.c:1397 [inline] + slab_free mm/slub.c:2952 [inline] + kmem_cache_free+0xb8/0x3a8 mm/slub.c:2968 + net_free net/core/net_namespace.c:400 [inline] + net_drop_ns.part.6+0x78/0x90 net/core/net_namespace.c:407 + net_drop_ns net/core/net_namespace.c:406 [inline] + cleanup_net+0x53c/0x6d8 net/core/net_namespace.c:569 + process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153 + worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296 + kthread+0x2f0/0x378 kernel/kthread.c:255 + ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117 + +The buggy address belongs to the object at ffff8003496a3f80 + which belongs to the cache net_namespace of size 7872 +The buggy address is located 1796 bytes inside of + 7872-byte region [ffff8003496a3f80, ffff8003496a5e40) +The buggy address belongs to the page: +page:ffff7e000d25a800 count:1 mapcount:0 mapping:ffff80036ce4b000 +index:0x0 compound_mapcount: 0 +flags: 0xffffe0000008100(slab|head) +raw: 0ffffe0000008100 dead000000000100 dead000000000200 ffff80036ce4b000 +raw: 0000000000000000 0000000080040004 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8003496a4580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8003496a4600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff8003496a4680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8003496a4700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8003496a4780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +Fixes: 467fa15356ac("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") +Reported-by: Hulk Robot +Signed-off-by: Mao Wenan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/rds/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/rds/tcp.c b/net/rds/tcp.c +index c16f0a362c32..a729c47db781 100644 +--- a/net/rds/tcp.c ++++ b/net/rds/tcp.c +@@ -600,7 +600,7 @@ static void rds_tcp_kill_sock(struct net *net) + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); + +- if (net != c_net || !tc->t_sock) ++ if (net != c_net) + continue; + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { + list_move_tail(&tc->t_tcp_node, &tmp_list); +-- +2.19.1 + diff --git a/queue-5.0/net-sched-act_sample-fix-divide-by-zero-in-the-traff.patch b/queue-5.0/net-sched-act_sample-fix-divide-by-zero-in-the-traff.patch new file mode 100644 index 0000000000..0f01b7bac5 --- /dev/null +++ b/queue-5.0/net-sched-act_sample-fix-divide-by-zero-in-the-traff.patch @@ -0,0 +1,104 @@ +From 22ab96f63513f1db8339e5b35aafb857da482db0 Mon Sep 17 00:00:00 2001 +From: Davide Caratti +Date: Thu, 4 Apr 2019 12:31:35 +0200 +Subject: net/sched: act_sample: fix divide by zero in the traffic path + +[ Upstream commit fae2708174ae95d98d19f194e03d6e8f688ae195 ] + +the control path of 'sample' action does not validate the value of 'rate' +provided by the user, but then it uses it as divisor in the traffic path. +Validate it in tcf_sample_init(), and return -EINVAL with a proper extack +message in case that value is zero, to fix a splat with the script below: + + # tc f a dev test0 egress matchall action sample rate 0 group 1 index 2 + # tc -s a s action sample + total acts 1 + + action order 0: sample rate 1/0 group 1 pipe + index 2 ref 1 bind 1 installed 19 sec used 19 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + # ping 192.0.2.1 -I test0 -c1 -q + + divide error: 0000 [#1] SMP PTI + CPU: 1 PID: 6192 Comm: ping Not tainted 5.1.0-rc2.diag2+ #591 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + RIP: 0010:tcf_sample_act+0x9e/0x1e0 [act_sample] + Code: 6a f1 85 c0 74 0d 80 3d 83 1a 00 00 00 0f 84 9c 00 00 00 4d 85 e4 0f 84 85 00 00 00 e8 9b d7 9c f1 44 8b 8b e0 00 00 00 31 d2 <41> f7 f1 85 d2 75 70 f6 85 83 00 00 00 10 48 8b 45 10 8b 88 08 01 + RSP: 0018:ffffae320190ba30 EFLAGS: 00010246 + RAX: 00000000b0677d21 RBX: ffff8af1ed9ec000 RCX: 0000000059a9fe49 + RDX: 0000000000000000 RSI: 000000000c7e33b7 RDI: ffff8af23daa0af0 + RBP: ffff8af1ee11b200 R08: 0000000074fcaf7e R09: 0000000000000000 + R10: 0000000000000050 R11: ffffffffb3088680 R12: ffff8af232307f80 + R13: 0000000000000003 R14: ffff8af1ed9ec000 R15: 0000000000000000 + FS: 00007fe9c6d2f740(0000) GS:ffff8af23da80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fff6772f000 CR3: 00000000746a2004 CR4: 00000000001606e0 + Call Trace: + tcf_action_exec+0x7c/0x1c0 + tcf_classify+0x57/0x160 + __dev_queue_xmit+0x3dc/0xd10 + ip_finish_output2+0x257/0x6d0 + ip_output+0x75/0x280 + ip_send_skb+0x15/0x40 + raw_sendmsg+0xae3/0x1410 + sock_sendmsg+0x36/0x40 + __sys_sendto+0x10e/0x140 + __x64_sys_sendto+0x24/0x30 + do_syscall_64+0x60/0x210 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + [...] + Kernel panic - not syncing: Fatal exception in interrupt + +Add a TDC selftest to document that 'rate' is now being validated. + +Reported-by: Matteo Croce +Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") +Signed-off-by: Davide Caratti +Acked-by: Yotam Gigi +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/act_sample.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c +index 1a0c682fd734..fd62fe6c8e73 100644 +--- a/net/sched/act_sample.c ++++ b/net/sched/act_sample.c +@@ -43,8 +43,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, + struct tc_action_net *tn = net_generic(net, sample_net_id); + struct nlattr *tb[TCA_SAMPLE_MAX + 1]; + struct psample_group *psample_group; ++ u32 psample_group_num, rate; + struct tc_sample *parm; +- u32 psample_group_num; + struct tcf_sample *s; + bool exists = false; + int ret, err; +@@ -80,6 +80,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, + return -EEXIST; + } + ++ rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); ++ if (!rate) { ++ NL_SET_ERR_MSG(extack, "invalid sample rate"); ++ tcf_idr_release(*a, bind); ++ return -EINVAL; ++ } + psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); + psample_group = psample_group_get(net, psample_group_num); + if (!psample_group) { +@@ -91,7 +97,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, + + spin_lock_bh(&s->tcf_lock); + s->tcf_action = parm->action; +- s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); ++ s->rate = rate; + s->psample_group_num = psample_group_num; + RCU_INIT_POINTER(s->psample_group, psample_group); + +-- +2.19.1 + diff --git a/queue-5.0/net-sched-fix-get-helper-of-the-matchall-cls.patch b/queue-5.0/net-sched-fix-get-helper-of-the-matchall-cls.patch new file mode 100644 index 0000000000..80b50a49ba --- /dev/null +++ b/queue-5.0/net-sched-fix-get-helper-of-the-matchall-cls.patch @@ -0,0 +1,57 @@ +From 96faa0dcf0ceabee22f54482ba86fcda3120407c Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Thu, 28 Mar 2019 10:35:06 +0100 +Subject: net/sched: fix ->get helper of the matchall cls + +[ Upstream commit 0db6f8befc32c68bb13d7ffbb2e563c79e913e13 ] + +It returned always NULL, thus it was never possible to get the filter. + +Example: +$ ip link add foo type dummy +$ ip link add bar type dummy +$ tc qdisc add dev foo clsact +$ tc filter add dev foo protocol all pref 1 ingress handle 1234 \ + matchall action mirred ingress mirror dev bar + +Before the patch: +$ tc filter get dev foo protocol all pref 1 ingress handle 1234 matchall +Error: Specified filter handle not found. +We have an error talking to the kernel + +After: +$ tc filter get dev foo protocol all pref 1 ingress handle 1234 matchall +filter ingress protocol all pref 1 matchall chain 0 handle 0x4d2 + not_in_hw + action order 1: mirred (Ingress Mirror to device bar) pipe + index 1 ref 1 bind 1 + +CC: Yotam Gigi +CC: Jiri Pirko +Fixes: fd62d9f5c575 ("net/sched: matchall: Fix configuration race") +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_matchall.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c +index 0e408ee9dcec..5ba07cd11e31 100644 +--- a/net/sched/cls_matchall.c ++++ b/net/sched/cls_matchall.c +@@ -125,6 +125,11 @@ static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) + + static void *mall_get(struct tcf_proto *tp, u32 handle) + { ++ struct cls_mall_head *head = rtnl_dereference(tp->root); ++ ++ if (head && head->handle == handle) ++ return head; ++ + return NULL; + } + +-- +2.19.1 + diff --git a/queue-5.0/net-thunderx-fix-null-pointer-dereference-in-nicvf_o.patch b/queue-5.0/net-thunderx-fix-null-pointer-dereference-in-nicvf_o.patch new file mode 100644 index 0000000000..1ed101d2ec --- /dev/null +++ b/queue-5.0/net-thunderx-fix-null-pointer-dereference-in-nicvf_o.patch @@ -0,0 +1,136 @@ +From f1c8daba109e2244a780a2a0c1ca23780ef7616c Mon Sep 17 00:00:00 2001 +From: Lorenzo Bianconi +Date: Thu, 4 Apr 2019 12:16:27 +0200 +Subject: net: thunderx: fix NULL pointer dereference in nicvf_open/nicvf_stop + +[ Upstream commit 2ec1ed2aa68782b342458681aa4d16b65c9014d6 ] + +When a bpf program is uploaded, the driver computes the number of +xdp tx queues resulting in the allocation of additional qsets. +Starting from commit '2ecbe4f4a027 ("net: thunderx: replace global +nicvf_rx_mode_wq work queue for all VFs to private for each of them")' +the driver runs link state polling for each VF resulting in the +following NULL pointer dereference: + +[ 56.169256] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 +[ 56.178032] Mem abort info: +[ 56.180834] ESR = 0x96000005 +[ 56.183877] Exception class = DABT (current EL), IL = 32 bits +[ 56.189792] SET = 0, FnV = 0 +[ 56.192834] EA = 0, S1PTW = 0 +[ 56.195963] Data abort info: +[ 56.198831] ISV = 0, ISS = 0x00000005 +[ 56.202662] CM = 0, WnR = 0 +[ 56.205619] user pgtable: 64k pages, 48-bit VAs, pgdp = 0000000021f0c7a0 +[ 56.212315] [0000000000000020] pgd=0000000000000000, pud=0000000000000000 +[ 56.219094] Internal error: Oops: 96000005 [#1] SMP +[ 56.260459] CPU: 39 PID: 2034 Comm: ip Not tainted 5.1.0-rc3+ #3 +[ 56.266452] Hardware name: GIGABYTE R120-T33/MT30-GS1, BIOS T49 02/02/2018 +[ 56.273315] pstate: 80000005 (Nzcv daif -PAN -UAO) +[ 56.278098] pc : __ll_sc___cmpxchg_case_acq_64+0x4/0x20 +[ 56.283312] lr : mutex_lock+0x2c/0x50 +[ 56.286962] sp : ffff0000219af1b0 +[ 56.290264] x29: ffff0000219af1b0 x28: ffff800f64de49a0 +[ 56.295565] x27: 0000000000000000 x26: 0000000000000015 +[ 56.300865] x25: 0000000000000000 x24: 0000000000000000 +[ 56.306165] x23: 0000000000000000 x22: ffff000011117000 +[ 56.311465] x21: ffff800f64dfc080 x20: 0000000000000020 +[ 56.316766] x19: 0000000000000020 x18: 0000000000000001 +[ 56.322066] x17: 0000000000000000 x16: ffff800f2e077080 +[ 56.327367] x15: 0000000000000004 x14: 0000000000000000 +[ 56.332667] x13: ffff000010964438 x12: 0000000000000002 +[ 56.337967] x11: 0000000000000000 x10: 0000000000000c70 +[ 56.343268] x9 : ffff0000219af120 x8 : ffff800f2e077d50 +[ 56.348568] x7 : 0000000000000027 x6 : 000000062a9d6a84 +[ 56.353869] x5 : 0000000000000000 x4 : ffff800f2e077480 +[ 56.359169] x3 : 0000000000000008 x2 : ffff800f2e077080 +[ 56.364469] x1 : 0000000000000000 x0 : 0000000000000020 +[ 56.369770] Process ip (pid: 2034, stack limit = 0x00000000c862da3a) +[ 56.376110] Call trace: +[ 56.378546] __ll_sc___cmpxchg_case_acq_64+0x4/0x20 +[ 56.383414] drain_workqueue+0x34/0x198 +[ 56.387247] nicvf_open+0x48/0x9e8 [nicvf] +[ 56.391334] nicvf_open+0x898/0x9e8 [nicvf] +[ 56.395507] nicvf_xdp+0x1bc/0x238 [nicvf] +[ 56.399595] dev_xdp_install+0x68/0x90 +[ 56.403333] dev_change_xdp_fd+0xc8/0x240 +[ 56.407333] do_setlink+0x8e0/0xbe8 +[ 56.410810] __rtnl_newlink+0x5b8/0x6d8 +[ 56.414634] rtnl_newlink+0x54/0x80 +[ 56.418112] rtnetlink_rcv_msg+0x22c/0x2f8 +[ 56.422199] netlink_rcv_skb+0x60/0x120 +[ 56.426023] rtnetlink_rcv+0x28/0x38 +[ 56.429587] netlink_unicast+0x1c8/0x258 +[ 56.433498] netlink_sendmsg+0x1b4/0x350 +[ 56.437410] sock_sendmsg+0x4c/0x68 +[ 56.440887] ___sys_sendmsg+0x240/0x280 +[ 56.444711] __sys_sendmsg+0x68/0xb0 +[ 56.448275] __arm64_sys_sendmsg+0x2c/0x38 +[ 56.452361] el0_svc_handler+0x9c/0x128 +[ 56.456186] el0_svc+0x8/0xc +[ 56.459056] Code: 35ffff91 2a1003e0 d65f03c0 f9800011 (c85ffc10) +[ 56.465166] ---[ end trace 4a57fdc27b0a572c ]--- +[ 56.469772] Kernel panic - not syncing: Fatal exception + +Fix it by checking nicvf_rx_mode_wq pointer in nicvf_open and nicvf_stop + +Fixes: 2ecbe4f4a027 ("net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them") +Fixes: 2c632ad8bc74 ("net: thunderx: move link state polling function to VF") +Reported-by: Matteo Croce +Signed-off-by: Lorenzo Bianconi +Tested-by: Matteo Croce +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../net/ethernet/cavium/thunder/nicvf_main.c | 20 +++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +index 503cfadff4ac..d4ee9f9c8c34 100644 +--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +@@ -1328,10 +1328,11 @@ int nicvf_stop(struct net_device *netdev) + struct nicvf_cq_poll *cq_poll = NULL; + union nic_mbx mbx = {}; + +- cancel_delayed_work_sync(&nic->link_change_work); +- + /* wait till all queued set_rx_mode tasks completes */ +- drain_workqueue(nic->nicvf_rx_mode_wq); ++ if (nic->nicvf_rx_mode_wq) { ++ cancel_delayed_work_sync(&nic->link_change_work); ++ drain_workqueue(nic->nicvf_rx_mode_wq); ++ } + + mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; + nicvf_send_msg_to_pf(nic, &mbx); +@@ -1452,7 +1453,8 @@ int nicvf_open(struct net_device *netdev) + struct nicvf_cq_poll *cq_poll = NULL; + + /* wait till all queued set_rx_mode tasks completes if any */ +- drain_workqueue(nic->nicvf_rx_mode_wq); ++ if (nic->nicvf_rx_mode_wq) ++ drain_workqueue(nic->nicvf_rx_mode_wq); + + netif_carrier_off(netdev); + +@@ -1550,10 +1552,12 @@ int nicvf_open(struct net_device *netdev) + /* Send VF config done msg to PF */ + nicvf_send_cfg_done(nic); + +- INIT_DELAYED_WORK(&nic->link_change_work, +- nicvf_link_status_check_task); +- queue_delayed_work(nic->nicvf_rx_mode_wq, +- &nic->link_change_work, 0); ++ if (nic->nicvf_rx_mode_wq) { ++ INIT_DELAYED_WORK(&nic->link_change_work, ++ nicvf_link_status_check_task); ++ queue_delayed_work(nic->nicvf_rx_mode_wq, ++ &nic->link_change_work, 0); ++ } + + return 0; + cleanup: +-- +2.19.1 + diff --git a/queue-5.0/net-vrf-fix-ping-failed-when-vrf-mtu-is-set-to-0.patch b/queue-5.0/net-vrf-fix-ping-failed-when-vrf-mtu-is-set-to-0.patch new file mode 100644 index 0000000000..3337998fc4 --- /dev/null +++ b/queue-5.0/net-vrf-fix-ping-failed-when-vrf-mtu-is-set-to-0.patch @@ -0,0 +1,63 @@ +From f46010b917db41a3475f25295dde1bccc24d8ffa Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Mon, 8 Apr 2019 10:04:20 +0800 +Subject: net: vrf: Fix ping failed when vrf mtu is set to 0 + +[ Upstream commit 5055376a3b44c4021de8830c9157f086a97731df ] + +When the mtu of a vrf device is set to 0, it would cause ping +failed. So I think we should limit vrf mtu in a reasonable range +to solve this problem. I set dev->min_mtu to IPV6_MIN_MTU, so it +will works for both ipv4 and ipv6. And if dev->max_mtu still be 0 +can be confusing, so I set dev->max_mtu to ETH_MAX_MTU. + +Here is the reproduce step: + +1.Config vrf interface and set mtu to 0: +3: enp4s0: mtu 1500 qdisc fq_codel +master vrf1 state UP mode DEFAULT group default qlen 1000 + link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff + +2.Ping peer: +3: enp4s0: mtu 1500 qdisc fq_codel +master vrf1 state UP group default qlen 1000 + link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff + inet 10.0.0.1/16 scope global enp4s0 + valid_lft forever preferred_lft forever +connect: Network is unreachable + +3.Set mtu to default value, ping works: +PING 10.0.0.2 (10.0.0.2) 56(84) bytes of data. +64 bytes from 10.0.0.2: icmp_seq=1 ttl=64 time=1.88 ms + +Fixes: ad49bc6361ca2 ("net: vrf: remove MTU limits for vrf device") +Signed-off-by: Miaohe Lin +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/vrf.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index 6d1a1abbed27..cd15c32b2e43 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -1275,8 +1275,12 @@ static void vrf_setup(struct net_device *dev) + dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_NO_RX_HANDLER; + +- dev->min_mtu = 0; +- dev->max_mtu = 0; ++ /* VRF devices do not care about MTU, but if the MTU is set ++ * too low then the ipv4 and ipv6 protocols are disabled ++ * which breaks networking. ++ */ ++ dev->min_mtu = IPV6_MIN_MTU; ++ dev->max_mtu = ETH_MAX_MTU; + } + + static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], +-- +2.19.1 + diff --git a/queue-5.0/netns-provide-pure-entropy-for-net_hash_mix.patch b/queue-5.0/netns-provide-pure-entropy-for-net_hash_mix.patch new file mode 100644 index 0000000000..27f3f009ae --- /dev/null +++ b/queue-5.0/netns-provide-pure-entropy-for-net_hash_mix.patch @@ -0,0 +1,81 @@ +From b6f9837c1843d6dc02ae03942ad6fd2d9b8df2ff Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 27 Mar 2019 08:21:30 -0700 +Subject: netns: provide pure entropy for net_hash_mix() + +[ Upstream commit 355b98553789b646ed97ad801a619ff898471b92 ] + +net_hash_mix() currently uses kernel address of a struct net, +and is used in many places that could be used to reveal this +address to a patient attacker, thus defeating KASLR, for +the typical case (initial net namespace, &init_net is +not dynamically allocated) + +I believe the original implementation tried to avoid spending +too many cycles in this function, but security comes first. + +Also provide entropy regardless of CONFIG_NET_NS. + +Fixes: 0b4419162aa6 ("netns: introduce the net_hash_mix "salt" for hashes") +Signed-off-by: Eric Dumazet +Reported-by: Amit Klein +Reported-by: Benny Pinkas +Cc: Pavel Emelyanov +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/net_namespace.h | 1 + + include/net/netns/hash.h | 10 ++-------- + net/core/net_namespace.c | 1 + + 3 files changed, 4 insertions(+), 8 deletions(-) + +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index 99d4148e0f90..1c3126c14930 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -58,6 +58,7 @@ struct net { + */ + spinlock_t rules_mod_lock; + ++ u32 hash_mix; + atomic64_t cookie_gen; + + struct list_head list; /* list of network namespaces */ +diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h +index 16a842456189..d9b665151f3d 100644 +--- a/include/net/netns/hash.h ++++ b/include/net/netns/hash.h +@@ -2,16 +2,10 @@ + #ifndef __NET_NS_HASH_H__ + #define __NET_NS_HASH_H__ + +-#include +- +-struct net; ++#include + + static inline u32 net_hash_mix(const struct net *net) + { +-#ifdef CONFIG_NET_NS +- return (u32)(((unsigned long)net) >> ilog2(sizeof(*net))); +-#else +- return 0; +-#endif ++ return net->hash_mix; + } + #endif +diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c +index b02fb19df2cc..40c249c574c1 100644 +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) + + refcount_set(&net->count, 1); + refcount_set(&net->passive, 1); ++ get_random_bytes(&net->hash_mix, sizeof(u32)); + net->dev_base_seq = 1; + net->user_ns = user_ns; + idr_init(&net->netns_ids); +-- +2.19.1 + diff --git a/queue-5.0/nfp-disable-netpoll-on-representors.patch b/queue-5.0/nfp-disable-netpoll-on-representors.patch new file mode 100644 index 0000000000..5088411898 --- /dev/null +++ b/queue-5.0/nfp-disable-netpoll-on-representors.patch @@ -0,0 +1,46 @@ +From 53b0051cfdc1fbd78800f414c4d9b762e1432ca3 Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Wed, 27 Mar 2019 11:38:39 -0700 +Subject: nfp: disable netpoll on representors + +[ Upstream commit c3e1f7fff69c78169c8ac40cc74ac4307f74e36d ] + +NFP reprs are software device on top of the PF's vNIC. +The comment above __dev_queue_xmit() sayeth: + + When calling this method, interrupts MUST be enabled. This is because + the BH enable code must have IRQs enabled so that it will not deadlock. + +For netconsole we can't guarantee IRQ state, let's just +disable netpoll on representors to be on the safe side. + +When the initial implementation of NFP reprs was added by the +commit 5de73ee46704 ("nfp: general representor implementation") +.ndo_poll_controller was required for netpoll to be enabled. + +Fixes: ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional") +Signed-off-by: Jakub Kicinski +Reviewed-by: John Hurley +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +index 7d62e3698f08..73db94e55fd0 100644 +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +@@ -384,7 +384,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, + netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS; + +- netdev->priv_flags |= IFF_NO_QUEUE; ++ netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; + netdev->features |= NETIF_F_LLTX; + + if (nfp_app_has_tc(app)) { +-- +2.19.1 + diff --git a/queue-5.0/nfp-validate-the-return-code-from-dev_queue_xmit.patch b/queue-5.0/nfp-validate-the-return-code-from-dev_queue_xmit.patch new file mode 100644 index 0000000000..4ebdd8d85b --- /dev/null +++ b/queue-5.0/nfp-validate-the-return-code-from-dev_queue_xmit.patch @@ -0,0 +1,37 @@ +From e0b1d83bba66914de74ca25b415d284ad561b0ef Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Wed, 27 Mar 2019 11:38:38 -0700 +Subject: nfp: validate the return code from dev_queue_xmit() + +[ Upstream commit c8ba5b91a04e3e2643e48501c114108802f21cda ] + +dev_queue_xmit() may return error codes as well as netdev_tx_t, +and it always consumes the skb. Make sure we always return a +correct netdev_tx_t value. + +Fixes: eadfa4c3be99 ("nfp: add stats and xmit helpers for representors") +Signed-off-by: Jakub Kicinski +Reviewed-by: John Hurley +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +index 69d7aebda09b..7d62e3698f08 100644 +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +@@ -196,7 +196,7 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) + ret = dev_queue_xmit(skb); + nfp_repr_inc_tx_stats(netdev, len, ret); + +- return ret; ++ return NETDEV_TX_OK; + } + + static int nfp_repr_stop(struct net_device *netdev) +-- +2.19.1 + diff --git a/queue-5.0/openvswitch-fix-flow-actions-reallocation.patch b/queue-5.0/openvswitch-fix-flow-actions-reallocation.patch new file mode 100644 index 0000000000..24038afab4 --- /dev/null +++ b/queue-5.0/openvswitch-fix-flow-actions-reallocation.patch @@ -0,0 +1,68 @@ +From c219cea7d51ea1701e8173f8822fa303b425080d Mon Sep 17 00:00:00 2001 +From: Andrea Righi +Date: Thu, 28 Mar 2019 07:36:00 +0100 +Subject: openvswitch: fix flow actions reallocation + +[ Upstream commit f28cd2af22a0c134e4aa1c64a70f70d815d473fb ] + +The flow action buffer can be resized if it's not big enough to contain +all the requested flow actions. However, this resize doesn't take into +account the new requested size, the buffer is only increased by a factor +of 2x. This might be not enough to contain the new data, causing a +buffer overflow, for example: + +[ 42.044472] ============================================================================= +[ 42.045608] BUG kmalloc-96 (Not tainted): Redzone overwritten +[ 42.046415] ----------------------------------------------------------------------------- + +[ 42.047715] Disabling lock debugging due to kernel taint +[ 42.047716] INFO: 0x8bf2c4a5-0x720c0928. First byte 0x0 instead of 0xcc +[ 42.048677] INFO: Slab 0xbc6d2040 objects=29 used=18 fp=0xdc07dec4 flags=0x2808101 +[ 42.049743] INFO: Object 0xd53a3464 @offset=2528 fp=0xccdcdebb + +[ 42.050747] Redzone 76f1b237: cc cc cc cc cc cc cc cc ........ +[ 42.051839] Object d53a3464: 6b 6b 6b 6b 6b 6b 6b 6b 0c 00 00 00 6c 00 00 00 kkkkkkkk....l... +[ 42.053015] Object f49a30cc: 6c 00 0c 00 00 00 00 00 00 00 00 03 78 a3 15 f6 l...........x... +[ 42.054203] Object acfe4220: 20 00 02 00 ff ff ff ff 00 00 00 00 00 00 00 00 ............... +[ 42.055370] Object 21024e91: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +[ 42.056541] Object 070e04c3: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +[ 42.057797] Object 948a777a: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +[ 42.059061] Redzone 8bf2c4a5: 00 00 00 00 .... +[ 42.060189] Padding a681b46e: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ + +Fix by making sure the new buffer is properly resized to contain all the +requested data. + +BugLink: https://bugs.launchpad.net/bugs/1813244 +Signed-off-by: Andrea Righi +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/openvswitch/flow_netlink.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c +index 691da853bef5..4bdf5e3ac208 100644 +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, + + struct sw_flow_actions *acts; + int new_acts_size; +- int req_size = NLA_ALIGN(attr_len); ++ size_t req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + +- new_acts_size = ksize(*sfa) * 2; ++ new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { +-- +2.19.1 + diff --git a/queue-5.0/qmi_wwan-add-olicard-600.patch b/queue-5.0/qmi_wwan-add-olicard-600.patch new file mode 100644 index 0000000000..c7f9f9ad79 --- /dev/null +++ b/queue-5.0/qmi_wwan-add-olicard-600.patch @@ -0,0 +1,65 @@ +From 4f1dfcc704dad66c30b333790df3eedbf7a48546 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= +Date: Wed, 27 Mar 2019 15:26:01 +0100 +Subject: qmi_wwan: add Olicard 600 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 6289d0facd9ebce4cc83e5da39e15643ee998dc5 ] + +This is a Qualcomm based device with a QMI function on interface 4. +It is mode switched from 2020:2030 using a standard eject message. + +T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 +D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 +P: Vendor=2020 ProdID=2031 Rev= 2.32 +S: Manufacturer=Mobile Connect +S: Product=Mobile Connect +S: SerialNumber=0123456789ABCDEF +C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA +I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) +E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms +E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) +E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms +E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms +E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) +E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms +E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms +E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) +E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms +E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms +E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) +E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms +E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms +E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) +E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms +E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us + +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c +index 74bebbdb4b15..9195f3476b1d 100644 +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1203,6 +1203,7 @@ static const struct usb_device_id products[] = { + {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ + {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ + {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ ++ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ + {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ + {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ + {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ +-- +2.19.1 + diff --git a/queue-5.0/r8169-disable-aspm-again.patch b/queue-5.0/r8169-disable-aspm-again.patch new file mode 100644 index 0000000000..e7aafff9dc --- /dev/null +++ b/queue-5.0/r8169-disable-aspm-again.patch @@ -0,0 +1,53 @@ +From f98a71d44f4491e7bcfdfcf58ec7eea08618a800 Mon Sep 17 00:00:00 2001 +From: Heiner Kallweit +Date: Fri, 5 Apr 2019 20:46:46 +0200 +Subject: r8169: disable ASPM again + +[ Upstream commit b75bb8a5b755d0c7bf1ac071e4df2349a7644a1e ] + +There's a significant number of reports that re-enabling ASPM causes +different issues, ranging from decreased performance to system not +booting at all. This affects only a minority of users, but the number +of affected users is big enough that we better switch off ASPM again. + +This will hurt notebook users who are not affected by the issues, they +may see decreased battery runtime w/o ASPM. With the PCI core folks is +being discussed to add generic sysfs attributes to control ASPM. +Once this is in place brave enough users can re-enable ASPM on their +system. + +Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support") +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/realtek/r8169.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c +index f55d177ae894..5adb00f521db 100644 +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -7224,6 +7225,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + return rc; + } + ++ /* Disable ASPM completely as that cause random device stop working ++ * problems as well as full system hangs for some PCIe devices users. ++ */ ++ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); ++ + /* enable device (incl. PCI PM wakeup and hotplug setup) */ + rc = pcim_enable_device(pdev); + if (rc < 0) { +-- +2.19.1 + diff --git a/queue-5.0/r8169-disable-default-rx-interrupt-coalescing-on-rtl.patch b/queue-5.0/r8169-disable-default-rx-interrupt-coalescing-on-rtl.patch new file mode 100644 index 0000000000..999e8cdf97 --- /dev/null +++ b/queue-5.0/r8169-disable-default-rx-interrupt-coalescing-on-rtl.patch @@ -0,0 +1,41 @@ +From d6cf7e8335a973f1338eb4949994c0c3ce6d9d34 Mon Sep 17 00:00:00 2001 +From: Heiner Kallweit +Date: Sat, 30 Mar 2019 17:13:24 +0100 +Subject: r8169: disable default rx interrupt coalescing on RTL8168 + +[ Upstream commit 288ac524cf70a8e7ed851a61ed2a9744039dae8d ] + +It was reported that re-introducing ASPM, in combination with RX +interrupt coalescing, results in significantly increased packet +latency, see [0]. Disabling ASPM or RX interrupt coalescing fixes +the issue. Therefore change the driver's default to disable RX +interrupt coalescing. Users still have the option to enable RX +coalescing via ethtool. + +[0] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=925496 + +Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support") +Reported-by: Mike Crowe +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/realtek/r8169.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c +index 5adb00f521db..365cddbfc684 100644 +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -5333,7 +5333,7 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp) + tp->cp_cmd |= PktCntrDisable | INTT_1; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); + +- RTL_W16(tp, IntrMitigate, 0x5151); ++ RTL_W16(tp, IntrMitigate, 0x5100); + + /* Work around for RxFIFO overflow. */ + if (tp->mac_version == RTL_GIGA_MAC_VER_11) { +-- +2.19.1 + diff --git a/queue-5.0/sctp-initialize-_pad-of-sockaddr_in-before-copying-t.patch b/queue-5.0/sctp-initialize-_pad-of-sockaddr_in-before-copying-t.patch new file mode 100644 index 0000000000..decbc718fe --- /dev/null +++ b/queue-5.0/sctp-initialize-_pad-of-sockaddr_in-before-copying-t.patch @@ -0,0 +1,58 @@ +From 90c1e14735137658645c625b8598459c3ff58694 Mon Sep 17 00:00:00 2001 +From: Xin Long +Date: Sun, 31 Mar 2019 16:58:15 +0800 +Subject: sctp: initialize _pad of sockaddr_in before copying to user memory + +[ Upstream commit 09279e615c81ce55e04835970601ae286e3facbe ] + +Syzbot report a kernel-infoleak: + + BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 + Call Trace: + _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 + copy_to_user include/linux/uaccess.h:174 [inline] + sctp_getsockopt_peer_addrs net/sctp/socket.c:5911 [inline] + sctp_getsockopt+0x1668e/0x17f70 net/sctp/socket.c:7562 + ... + Uninit was stored to memory at: + sctp_transport_init net/sctp/transport.c:61 [inline] + sctp_transport_new+0x16d/0x9a0 net/sctp/transport.c:115 + sctp_assoc_add_peer+0x532/0x1f70 net/sctp/associola.c:637 + sctp_process_param net/sctp/sm_make_chunk.c:2548 [inline] + sctp_process_init+0x1a1b/0x3ed0 net/sctp/sm_make_chunk.c:2361 + ... + Bytes 8-15 of 16 are uninitialized + +It was caused by that th _pad field (the 8-15 bytes) of a v4 addr (saved in +struct sockaddr_in) wasn't initialized, but directly copied to user memory +in sctp_getsockopt_peer_addrs(). + +So fix it by calling memset(addr->v4.sin_zero, 0, 8) to initialize _pad of +sockaddr_in before copying it to user memory in sctp_v4_addr_to_user(), as +sctp_v6_addr_to_user() does. + +Reported-by: syzbot+86b5c7c236a22616a72f@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Tested-by: Alexander Potapenko +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sctp/protocol.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c +index 6abc8b274270..951afdeea5e9 100644 +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -600,6 +600,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, + static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) + { + /* No address mapping for V4 sockets */ ++ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); + return sizeof(struct sockaddr_in); + } + +-- +2.19.1 + diff --git a/queue-5.0/series b/queue-5.0/series index 83dc69e051..053cbd4dac 100644 --- a/queue-5.0/series +++ b/queue-5.0/series @@ -3,3 +3,39 @@ kbuild-pkg-use-f-srctree-makefile-to-recurse-to-top-.patch netfilter-nft_compat-use-.release_ops-and-remove-lis.patch netfilter-nf_tables-use-after-free-in-dynamic-operat.patch netfilter-nf_tables-add-missing-release_ops-in-error.patch +hv_netvsc-fix-unwanted-wakeup-after-tx_disable.patch +ibmvnic-fix-completion-structure-initialization.patch +ip6_tunnel-match-to-arphrd_tunnel6-for-dev-type.patch +ipv6-fix-dangling-pointer-when-ipv6-fragment.patch +ipv6-sit-reset-ip-header-pointer-in-ipip6_rcv.patch +kcm-switch-order-of-device-registration-to-fix-a-cra.patch +net-ethtool-not-call-vzalloc-for-zero-sized-memory-r.patch +net-gro-fix-gro-flush-when-receiving-a-gso-packet.patch +net-mlx5-decrease-default-mr-cache-size.patch +netns-provide-pure-entropy-for-net_hash_mix.patch +net-rds-force-to-destroy-connection-if-t_sock-is-nul.patch +net-sched-act_sample-fix-divide-by-zero-in-the-traff.patch +net-sched-fix-get-helper-of-the-matchall-cls.patch +openvswitch-fix-flow-actions-reallocation.patch +qmi_wwan-add-olicard-600.patch +r8169-disable-aspm-again.patch +sctp-initialize-_pad-of-sockaddr_in-before-copying-t.patch +tcp-ensure-dctcp-reacts-to-losses.patch +tcp-fix-a-potential-null-pointer-dereference-in-tcp_.patch +vrf-check-accept_source_route-on-the-original-netdev.patch +net-mlx5e-fix-error-handling-when-refreshing-tirs.patch +net-mlx5e-add-a-lock-on-tir-list.patch +nfp-validate-the-return-code-from-dev_queue_xmit.patch +nfp-disable-netpoll-on-representors.patch +bnxt_en-improve-rx-consumer-index-validity-check.patch +bnxt_en-reset-device-on-rx-buffer-errors.patch +net-ip_gre-fix-possible-use-after-free-in-erspan_rcv.patch +net-ip6_gre-fix-possible-use-after-free-in-ip6erspan.patch +net-bridge-always-clear-mcast-matching-struct-on-rep.patch +net-thunderx-fix-null-pointer-dereference-in-nicvf_o.patch +net-vrf-fix-ping-failed-when-vrf-mtu-is-set-to-0.patch +net-core-netif_receive_skb_list-unlist-skb-before-pa.patch +r8169-disable-default-rx-interrupt-coalescing-on-rtl.patch +net-mlx5-add-a-missing-check-on-idr_find-free-buf.patch +net-mlx5e-update-xoff-formula.patch +net-mlx5e-update-xon-formula.patch diff --git a/queue-5.0/tcp-ensure-dctcp-reacts-to-losses.patch b/queue-5.0/tcp-ensure-dctcp-reacts-to-losses.patch new file mode 100644 index 0000000000..eea9759c12 --- /dev/null +++ b/queue-5.0/tcp-ensure-dctcp-reacts-to-losses.patch @@ -0,0 +1,145 @@ +From 1af50b31a46f163a9e1fc8ae15041331953b9d1a Mon Sep 17 00:00:00 2001 +From: Koen De Schepper +Date: Thu, 4 Apr 2019 12:24:02 +0000 +Subject: tcp: Ensure DCTCP reacts to losses +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit aecfde23108b8e637d9f5c5e523b24fb97035dc3 ] + +RFC8257 §3.5 explicitly states that "A DCTCP sender MUST react to +loss episodes in the same way as conventional TCP". + +Currently, Linux DCTCP performs no cwnd reduction when losses +are encountered. Optionally, the dctcp_clamp_alpha_on_loss resets +alpha to its maximal value if a RTO happens. This behavior +is sub-optimal for at least two reasons: i) it ignores losses +triggering fast retransmissions; and ii) it causes unnecessary large +cwnd reduction in the future if the loss was isolated as it resets +the historical term of DCTCP's alpha EWMA to its maximal value (i.e., +denoting a total congestion). The second reason has an especially +noticeable effect when using DCTCP in high BDP environments, where +alpha normally stays at low values. + +This patch replace the clamping of alpha by setting ssthresh to +half of cwnd for both fast retransmissions and RTOs, at most once +per RTT. Consequently, the dctcp_clamp_alpha_on_loss module parameter +has been removed. + +The table below shows experimental results where we measured the +drop probability of a PIE AQM (not applying ECN marks) at a +bottleneck in the presence of a single TCP flow with either the +alpha-clamping option enabled or the cwnd halving proposed by this +patch. Results using reno or cubic are given for comparison. + + | Link | RTT | Drop + TCP CC | speed | base+AQM | probability + ==================|=========|==========|============ + CUBIC | 40Mbps | 7+20ms | 0.21% + RENO | | | 0.19% + DCTCP-CLAMP-ALPHA | | | 25.80% + DCTCP-HALVE-CWND | | | 0.22% + ------------------|---------|----------|------------ + CUBIC | 100Mbps | 7+20ms | 0.03% + RENO | | | 0.02% + DCTCP-CLAMP-ALPHA | | | 23.30% + DCTCP-HALVE-CWND | | | 0.04% + ------------------|---------|----------|------------ + CUBIC | 800Mbps | 1+1ms | 0.04% + RENO | | | 0.05% + DCTCP-CLAMP-ALPHA | | | 18.70% + DCTCP-HALVE-CWND | | | 0.06% + +We see that, without halving its cwnd for all source of losses, +DCTCP drives the AQM to large drop probabilities in order to keep +the queue length under control (i.e., it repeatedly faces RTOs). +Instead, if DCTCP reacts to all source of losses, it can then be +controlled by the AQM using similar drop levels than cubic or reno. + +Signed-off-by: Koen De Schepper +Signed-off-by: Olivier Tilmans +Cc: Bob Briscoe +Cc: Lawrence Brakmo +Cc: Florian Westphal +Cc: Daniel Borkmann +Cc: Yuchung Cheng +Cc: Neal Cardwell +Cc: Eric Dumazet +Cc: Andrew Shewmaker +Cc: Glenn Judd +Acked-by: Florian Westphal +Acked-by: Neal Cardwell +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_dctcp.c | 36 ++++++++++++++++++------------------ + 1 file changed, 18 insertions(+), 18 deletions(-) + +diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c +index cd4814f7e962..359da68d7c06 100644 +--- a/net/ipv4/tcp_dctcp.c ++++ b/net/ipv4/tcp_dctcp.c +@@ -67,11 +67,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; + module_param(dctcp_alpha_on_init, uint, 0644); + MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); + +-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly; +-module_param(dctcp_clamp_alpha_on_loss, uint, 0644); +-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss, +- "parameter for clamping alpha on loss"); +- + static struct tcp_congestion_ops dctcp_reno; + + static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) +@@ -164,21 +159,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) + } + } + +-static void dctcp_state(struct sock *sk, u8 new_state) ++static void dctcp_react_to_loss(struct sock *sk) + { +- if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) { +- struct dctcp *ca = inet_csk_ca(sk); ++ struct dctcp *ca = inet_csk_ca(sk); ++ struct tcp_sock *tp = tcp_sk(sk); + +- /* If this extension is enabled, we clamp dctcp_alpha to +- * max on packet loss; the motivation is that dctcp_alpha +- * is an indicator to the extend of congestion and packet +- * loss is an indicator of extreme congestion; setting +- * this in practice turned out to be beneficial, and +- * effectively assumes total congestion which reduces the +- * window by half. +- */ +- ca->dctcp_alpha = DCTCP_MAX_ALPHA; +- } ++ ca->loss_cwnd = tp->snd_cwnd; ++ tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); ++} ++ ++static void dctcp_state(struct sock *sk, u8 new_state) ++{ ++ if (new_state == TCP_CA_Recovery && ++ new_state != inet_csk(sk)->icsk_ca_state) ++ dctcp_react_to_loss(sk); ++ /* We handle RTO in dctcp_cwnd_event to ensure that we perform only ++ * one loss-adjustment per RTT. ++ */ + } + + static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) +@@ -190,6 +187,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) + case CA_EVENT_ECN_NO_CE: + dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); + break; ++ case CA_EVENT_LOSS: ++ dctcp_react_to_loss(sk); ++ break; + default: + /* Don't care for the rest. */ + break; +-- +2.19.1 + diff --git a/queue-5.0/tcp-fix-a-potential-null-pointer-dereference-in-tcp_.patch b/queue-5.0/tcp-fix-a-potential-null-pointer-dereference-in-tcp_.patch new file mode 100644 index 0000000000..1fcce27ce6 --- /dev/null +++ b/queue-5.0/tcp-fix-a-potential-null-pointer-dereference-in-tcp_.patch @@ -0,0 +1,41 @@ +From 09a44f6ff0c65aa9b78ec861d31f679e700f1abe Mon Sep 17 00:00:00 2001 +From: Dust Li +Date: Mon, 1 Apr 2019 16:04:53 +0800 +Subject: tcp: fix a potential NULL pointer dereference in tcp_sk_exit + +[ Upstream commit b506bc975f60f06e13e74adb35e708a23dc4e87c ] + + When tcp_sk_init() failed in inet_ctl_sock_create(), + 'net->ipv4.tcp_congestion_control' will be left + uninitialized, but tcp_sk_exit() hasn't check for + that. + + This patch add checking on 'net->ipv4.tcp_congestion_control' + in tcp_sk_exit() to prevent NULL-ptr dereference. + +Fixes: 6670e1524477 ("tcp: Namespace-ify sysctl_tcp_default_congestion_control") +Signed-off-by: Dust Li +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index 1aae9ab57fe9..00852f47a73d 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -2578,7 +2578,8 @@ static void __net_exit tcp_sk_exit(struct net *net) + { + int cpu; + +- module_put(net->ipv4.tcp_congestion_control->owner); ++ if (net->ipv4.tcp_congestion_control) ++ module_put(net->ipv4.tcp_congestion_control->owner); + + for_each_possible_cpu(cpu) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); +-- +2.19.1 + diff --git a/queue-5.0/vrf-check-accept_source_route-on-the-original-netdev.patch b/queue-5.0/vrf-check-accept_source_route-on-the-original-netdev.patch new file mode 100644 index 0000000000..1d4b6a7f41 --- /dev/null +++ b/queue-5.0/vrf-check-accept_source_route-on-the-original-netdev.patch @@ -0,0 +1,98 @@ +From 5cde3719ed617f010b2860e7dc9b78b27182f378 Mon Sep 17 00:00:00 2001 +From: Stephen Suryaputra +Date: Mon, 1 Apr 2019 09:17:32 -0400 +Subject: vrf: check accept_source_route on the original netdevice + +[ Upstream commit 8c83f2df9c6578ea4c5b940d8238ad8a41b87e9e ] + +Configuration check to accept source route IP options should be made on +the incoming netdevice when the skb->dev is an l3mdev master. The route +lookup for the source route next hop also needs the incoming netdev. + +v2->v3: +- Simplify by passing the original netdevice down the stack (per David + Ahern). + +Signed-off-by: Stephen Suryaputra +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/ip.h | 2 +- + net/ipv4/ip_input.c | 7 +++---- + net/ipv4/ip_options.c | 4 ++-- + 3 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/include/net/ip.h b/include/net/ip.h +index be3cad9c2e4c..583526aad1d0 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -677,7 +677,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, + unsigned char __user *data, int optlen); + void ip_options_undo(struct ip_options *opt); + void ip_forward_options(struct sk_buff *skb); +-int ip_options_rcv_srr(struct sk_buff *skb); ++int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); + + /* + * Functions provided by ip_sockglue.c +diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c +index 1f4737b77067..ccf0d31b6ce5 100644 +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb) + ip_local_deliver_finish); + } + +-static inline bool ip_rcv_options(struct sk_buff *skb) ++static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) + { + struct ip_options *opt; + const struct iphdr *iph; +- struct net_device *dev = skb->dev; + + /* It looks as overkill, because not all + IP options require packet mangling. +@@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) + } + } + +- if (ip_options_rcv_srr(skb)) ++ if (ip_options_rcv_srr(skb, dev)) + goto drop; + } + +@@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, + } + #endif + +- if (iph->ihl > 5 && ip_rcv_options(skb)) ++ if (iph->ihl > 5 && ip_rcv_options(skb, dev)) + goto drop; + + rt = skb_rtable(skb); +diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c +index 32a35043c9f5..3db31bb9df50 100644 +--- a/net/ipv4/ip_options.c ++++ b/net/ipv4/ip_options.c +@@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb) + } + } + +-int ip_options_rcv_srr(struct sk_buff *skb) ++int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) + { + struct ip_options *opt = &(IPCB(skb)->opt); + int srrspace, srrptr; +@@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) + + orefdst = skb->_skb_refdst; + skb_dst_set(skb, NULL); +- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); ++ err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); + rt2 = skb_rtable(skb); + if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { + skb_dst_drop(skb); +-- +2.19.1 + -- 2.39.2