From cfc0e9bf9365c2588bb4f0e5100304df24cded58 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 May 2018 20:11:42 +0200 Subject: [PATCH] 4.16-stable patches added patches: 3c59x-convert-to-generic-dma-api.patch cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch net-dsa-do-not-register-devlink-for-unused-ports.patch net-fix-a-bug-in-removing-queues-from-xps-map.patch net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch net-ip6_gre-fix-ip6erspan-hlen-calculation.patch net-ip6_gre-fix-tunnel-metadata-device-sharing.patch net-ip6_gre-request-headroom-in-__gre6_xmit.patch net-ip6_gre-split-up-ip6gre_changelink.patch net-ip6_gre-split-up-ip6gre_newlink.patch net-ip6_gre-split-up-ip6gre_tnl_change.patch net-ip6_gre-split-up-ip6gre_tnl_link_config.patch net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch net-sched-red-avoid-hashing-null-child.patch net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch net-test-tailroom-before-appending-to-linear-skb.patch packet-in-packet_snd-start-writing-at-link-layer-allocation.patch qed-fix-ll2-race-during-connection-terminate.patch qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch qed-ll2-flush-isles-when-connection-is-closed.patch sock_diag-fix-use-after-free-read-in-__sk_free.patch sparc-vio-use-put_device-instead-of-kfree.patch tcp-purge-write-queue-in-tcp_connect_init.patch tun-fix-use-after-free-for-ptr_ring.patch tuntap-fix-use-after-free-during-release.patch vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch vmxnet3-use-dma-memory-barriers-where-required.patch --- .../3c59x-convert-to-generic-dma-api.patch | 286 ++++++++++++++++++ ...ple-mask-validation-for-hash-filters.patch | 137 +++++++++ ...set-in-collecting-tx-rate-limit-info.patch | 68 +++++ ...-bcm_sf2-fix-ipv6-rule-half-deletion.patch | 52 ++++ ...-bcm_sf2-fix-ipv6-rules-and-chain-id.patch | 83 +++++ ..._cls_loc_any-overwrite-for-last-rule.patch | 42 +++ ...ot-register-devlink-for-unused-ports.patch | 50 +++ ...-bug-in-removing-queues-from-xps-map.patch | 33 ++ ...oom-request-in-ip6erspan_tunnel_xmit.patch | 193 ++++++++++++ ...6_gre-fix-ip6erspan-hlen-calculation.patch | 151 +++++++++ ...e-fix-tunnel-metadata-device-sharing.patch | 248 +++++++++++++++ ..._gre-request-headroom-in-__gre6_xmit.patch | 155 ++++++++++ ...t-ip6_gre-split-up-ip6gre_changelink.patch | 87 ++++++ .../net-ip6_gre-split-up-ip6gre_newlink.patch | 74 +++++ ...t-ip6_gre-split-up-ip6gre_tnl_change.patch | 48 +++ ..._gre-split-up-ip6gre_tnl_link_config.patch | 98 ++++++ ...rror-handling-in-mlx4_init_port_info.patch | 48 +++ ...k-in-the-error-path-of-tcf_vlan_init.patch | 34 +++ ...t-sched-red-avoid-hashing-null-child.patch | 108 +++++++ ...ssing-nlattrs-in-smc_pnetid-messages.patch | 132 ++++++++ ...lroom-before-appending-to-linear-skb.patch | 54 ++++ ...art-writing-at-link-layer-allocation.patch | 56 ++++ ...ll2-race-during-connection-terminate.patch | 90 ++++++ ...f-list-corruption-during-rmmod-flows.patch | 92 ++++++ ...lush-isles-when-connection-is-closed.patch | 63 ++++ queue-4.16/series | 32 ++ ...fix-use-after-free-read-in-__sk_free.patch | 128 ++++++++ ...-vio-use-put_device-instead-of-kfree.patch | 31 ++ ...urge-write-queue-in-tcp_connect_init.patch | 88 ++++++ .../tun-fix-use-after-free-for-ptr_ring.patch | 116 +++++++ ...ap-fix-use-after-free-during-release.patch | 43 +++ ...k-before-the-first-dma-map-operation.patch | 153 ++++++++++ ...e-dma-memory-barriers-where-required.patch | 91 ++++++ 33 files changed, 3164 insertions(+) create mode 100644 queue-4.16/3c59x-convert-to-generic-dma-api.patch create mode 100644 queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch create mode 100644 queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch create mode 100644 queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch create mode 100644 queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch create mode 100644 queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch create mode 100644 queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch create mode 100644 queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch create mode 100644 queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch create mode 100644 queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch create mode 100644 queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch create mode 100644 queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch create mode 100644 queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch create mode 100644 queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch create mode 100644 queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch create mode 100644 queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch create mode 100644 queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch create mode 100644 queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch create mode 100644 queue-4.16/net-sched-red-avoid-hashing-null-child.patch create mode 100644 queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch create mode 100644 queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch create mode 100644 queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch create mode 100644 queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch create mode 100644 queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch create mode 100644 queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch create mode 100644 queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch create mode 100644 queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch create mode 100644 queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch create mode 100644 queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch create mode 100644 queue-4.16/tuntap-fix-use-after-free-during-release.patch create mode 100644 queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch create mode 100644 queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch diff --git a/queue-4.16/3c59x-convert-to-generic-dma-api.patch b/queue-4.16/3c59x-convert-to-generic-dma-api.patch new file mode 100644 index 00000000000..72b1607e2e9 --- /dev/null +++ b/queue-4.16/3c59x-convert-to-generic-dma-api.patch @@ -0,0 +1,286 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Christoph Hellwig +Date: Sat, 12 May 2018 12:16:50 +0200 +Subject: 3c59x: convert to generic DMA API + +From: Christoph Hellwig + +[ Upstream commit 55c82617c3e82210b7471e9334e8fc5df6a9961f ] + +This driver supports EISA devices in addition to PCI devices, and relied +on the legacy behavior of the pci_dma* shims to pass on a NULL pointer +to the DMA API, and the DMA API being able to handle that. When the +NULL forwarding broke the EISA support got broken. Fix this by converting +to the DMA API instead of the legacy PCI shims. + +Fixes: 4167b2ad ("PCI: Remove NULL device handling from PCI DMA API") +Reported-by: tedheadster +Tested-by: tedheadster +Signed-off-by: Christoph Hellwig +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/3com/3c59x.c | 104 ++++++++++++++++++-------------------- + 1 file changed, 51 insertions(+), 53 deletions(-) + +--- a/drivers/net/ethernet/3com/3c59x.c ++++ b/drivers/net/ethernet/3com/3c59x.c +@@ -1212,9 +1212,9 @@ static int vortex_probe1(struct device * + vp->mii.reg_num_mask = 0x1f; + + /* Makes sure rings are at least 16 byte aligned. */ +- vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE ++ vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, +- &vp->rx_ring_dma); ++ &vp->rx_ring_dma, GFP_KERNEL); + retval = -ENOMEM; + if (!vp->rx_ring) + goto free_device; +@@ -1476,11 +1476,10 @@ static int vortex_probe1(struct device * + return 0; + + free_ring: +- pci_free_consistent(pdev, +- sizeof(struct boom_rx_desc) * RX_RING_SIZE +- + sizeof(struct boom_tx_desc) * TX_RING_SIZE, +- vp->rx_ring, +- vp->rx_ring_dma); ++ dma_free_coherent(&pdev->dev, ++ sizeof(struct boom_rx_desc) * RX_RING_SIZE + ++ sizeof(struct boom_tx_desc) * TX_RING_SIZE, ++ vp->rx_ring, vp->rx_ring_dma); + free_device: + free_netdev(dev); + pr_err(PFX "vortex_probe1 fails. Returns %d\n", retval); +@@ -1751,9 +1750,9 @@ vortex_open(struct net_device *dev) + break; /* Bad news! */ + + skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ +- dma = pci_map_single(VORTEX_PCI(vp), skb->data, +- PKT_BUF_SZ, PCI_DMA_FROMDEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) ++ dma = dma_map_single(vp->gendev, skb->data, ++ PKT_BUF_SZ, DMA_FROM_DEVICE); ++ if (dma_mapping_error(vp->gendev, dma)) + break; + vp->rx_ring[i].addr = cpu_to_le32(dma); + } +@@ -2067,9 +2066,9 @@ vortex_start_xmit(struct sk_buff *skb, s + if (vp->bus_master) { + /* Set the bus-master controller to transfer the packet. */ + int len = (skb->len + 3) & ~3; +- vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, +- PCI_DMA_TODEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { ++ vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len, ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +@@ -2168,9 +2167,9 @@ boomerang_start_xmit(struct sk_buff *skb + vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum); + + if (!skb_shinfo(skb)->nr_frags) { +- dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, +- PCI_DMA_TODEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) ++ dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(vp->gendev, dma_addr)) + goto out_dma_err; + + vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr); +@@ -2178,9 +2177,9 @@ boomerang_start_xmit(struct sk_buff *skb + } else { + int i; + +- dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, +- skb_headlen(skb), PCI_DMA_TODEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) ++ dma_addr = dma_map_single(vp->gendev, skb->data, ++ skb_headlen(skb), DMA_TO_DEVICE); ++ if (dma_mapping_error(vp->gendev, dma_addr)) + goto out_dma_err; + + vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr); +@@ -2189,21 +2188,21 @@ boomerang_start_xmit(struct sk_buff *skb + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + +- dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag, ++ dma_addr = skb_frag_dma_map(vp->gendev, frag, + 0, + frag->size, + DMA_TO_DEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) { ++ if (dma_mapping_error(vp->gendev, dma_addr)) { + for(i = i-1; i >= 0; i--) +- dma_unmap_page(&VORTEX_PCI(vp)->dev, ++ dma_unmap_page(vp->gendev, + le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr), + le32_to_cpu(vp->tx_ring[entry].frag[i+1].length), + DMA_TO_DEVICE); + +- pci_unmap_single(VORTEX_PCI(vp), ++ dma_unmap_single(vp->gendev, + le32_to_cpu(vp->tx_ring[entry].frag[0].addr), + le32_to_cpu(vp->tx_ring[entry].frag[0].length), +- PCI_DMA_TODEVICE); ++ DMA_TO_DEVICE); + + goto out_dma_err; + } +@@ -2218,8 +2217,8 @@ boomerang_start_xmit(struct sk_buff *skb + } + } + #else +- dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) ++ dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE); ++ if (dma_mapping_error(vp->gendev, dma_addr)) + goto out_dma_err; + vp->tx_ring[entry].addr = cpu_to_le32(dma_addr); + vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG); +@@ -2254,7 +2253,7 @@ boomerang_start_xmit(struct sk_buff *skb + out: + return NETDEV_TX_OK; + out_dma_err: +- dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n"); ++ dev_err(vp->gendev, "Error mapping dma buffer\n"); + goto out; + } + +@@ -2322,7 +2321,7 @@ vortex_interrupt(int irq, void *dev_id) + if (status & DMADone) { + if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) { + iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ +- pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE); ++ dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE); + pkts_compl++; + bytes_compl += vp->tx_skb->len; + dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ +@@ -2459,19 +2458,19 @@ boomerang_interrupt(int irq, void *dev_i + struct sk_buff *skb = vp->tx_skbuff[entry]; + #if DO_ZEROCOPY + int i; +- pci_unmap_single(VORTEX_PCI(vp), ++ dma_unmap_single(vp->gendev, + le32_to_cpu(vp->tx_ring[entry].frag[0].addr), + le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF, +- PCI_DMA_TODEVICE); ++ DMA_TO_DEVICE); + + for (i=1; i<=skb_shinfo(skb)->nr_frags; i++) +- pci_unmap_page(VORTEX_PCI(vp), ++ dma_unmap_page(vp->gendev, + le32_to_cpu(vp->tx_ring[entry].frag[i].addr), + le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF, +- PCI_DMA_TODEVICE); ++ DMA_TO_DEVICE); + #else +- pci_unmap_single(VORTEX_PCI(vp), +- le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); ++ dma_unmap_single(vp->gendev, ++ le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE); + #endif + pkts_compl++; + bytes_compl += skb->len; +@@ -2561,14 +2560,14 @@ static int vortex_rx(struct net_device * + /* 'skb_put()' points to the start of sk_buff data area. */ + if (vp->bus_master && + ! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) { +- dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len), +- pkt_len, PCI_DMA_FROMDEVICE); ++ dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len), ++ pkt_len, DMA_FROM_DEVICE); + iowrite32(dma, ioaddr + Wn7_MasterAddr); + iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); + iowrite16(StartDMAUp, ioaddr + EL3_CMD); + while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000) + ; +- pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE); ++ dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE); + } else { + ioread32_rep(ioaddr + RX_FIFO, + skb_put(skb, pkt_len), +@@ -2635,11 +2634,11 @@ boomerang_rx(struct net_device *dev) + if (pkt_len < rx_copybreak && + (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ +- pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); + /* 'skb_put()' points to the start of sk_buff data area. */ + skb_put_data(skb, vp->rx_skbuff[entry]->data, + pkt_len); +- pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); + vp->rx_copy++; + } else { + /* Pre-allocate the replacement skb. If it or its +@@ -2651,9 +2650,9 @@ boomerang_rx(struct net_device *dev) + dev->stats.rx_dropped++; + goto clear_complete; + } +- newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, +- PKT_BUF_SZ, PCI_DMA_FROMDEVICE); +- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { ++ newdma = dma_map_single(vp->gendev, newskb->data, ++ PKT_BUF_SZ, DMA_FROM_DEVICE); ++ if (dma_mapping_error(vp->gendev, newdma)) { + dev->stats.rx_dropped++; + consume_skb(newskb); + goto clear_complete; +@@ -2664,7 +2663,7 @@ boomerang_rx(struct net_device *dev) + vp->rx_skbuff[entry] = newskb; + vp->rx_ring[entry].addr = cpu_to_le32(newdma); + skb_put(skb, pkt_len); +- pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); + vp->rx_nocopy++; + } + skb->protocol = eth_type_trans(skb, dev); +@@ -2761,8 +2760,8 @@ vortex_close(struct net_device *dev) + if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */ + for (i = 0; i < RX_RING_SIZE; i++) + if (vp->rx_skbuff[i]) { +- pci_unmap_single( VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr), +- PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr), ++ PKT_BUF_SZ, DMA_FROM_DEVICE); + dev_kfree_skb(vp->rx_skbuff[i]); + vp->rx_skbuff[i] = NULL; + } +@@ -2775,12 +2774,12 @@ vortex_close(struct net_device *dev) + int k; + + for (k=0; k<=skb_shinfo(skb)->nr_frags; k++) +- pci_unmap_single(VORTEX_PCI(vp), ++ dma_unmap_single(vp->gendev, + le32_to_cpu(vp->tx_ring[i].frag[k].addr), + le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF, +- PCI_DMA_TODEVICE); ++ DMA_TO_DEVICE); + #else +- pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE); ++ dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE); + #endif + dev_kfree_skb(skb); + vp->tx_skbuff[i] = NULL; +@@ -3288,11 +3287,10 @@ static void vortex_remove_one(struct pci + + pci_iounmap(pdev, vp->ioaddr); + +- pci_free_consistent(pdev, +- sizeof(struct boom_rx_desc) * RX_RING_SIZE +- + sizeof(struct boom_tx_desc) * TX_RING_SIZE, +- vp->rx_ring, +- vp->rx_ring_dma); ++ dma_free_coherent(&pdev->dev, ++ sizeof(struct boom_rx_desc) * RX_RING_SIZE + ++ sizeof(struct boom_tx_desc) * TX_RING_SIZE, ++ vp->rx_ring, vp->rx_ring_dma); + + pci_release_regions(pdev); + diff --git a/queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch b/queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch new file mode 100644 index 00000000000..90a40330b81 --- /dev/null +++ b/queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch @@ -0,0 +1,137 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Kumar Sanghvi +Date: Mon, 14 May 2018 16:27:34 +0530 +Subject: cxgb4: Correct ntuple mask validation for hash filters + +From: Kumar Sanghvi + +[ Upstream commit 849a742c59a3d597473c0232f9c2506c69eeef14 ] + +Earlier code of doing bitwise AND with field width bits was wrong. +Instead, simplify code to calculate ntuple_mask based on supplied +fields and then compare with mask configured in hw - which is the +correct and simpler way to validate ntuple mask. + +Fixes: 3eb8b62d5a26 ("cxgb4: add support to create hash-filters via tc-flower offload") +Signed-off-by: Kumar Sanghvi +Signed-off-by: Ganesh Goudar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 88 +++++++--------------- + 1 file changed, 30 insertions(+), 58 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +@@ -836,7 +836,7 @@ bool is_filter_exact_match(struct adapte + { + struct tp_params *tp = &adap->params.tp; + u64 hash_filter_mask = tp->hash_filter_mask; +- u32 mask; ++ u64 ntuple_mask = 0; + + if (!is_hashfilter(adap)) + return false; +@@ -865,73 +865,45 @@ bool is_filter_exact_match(struct adapte + if (!fs->val.fport || fs->mask.fport != 0xffff) + return false; + +- if (tp->fcoe_shift >= 0) { +- mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W; +- if (mask && !fs->mask.fcoe) +- return false; +- } ++ /* calculate tuple mask and compare with mask configured in hw */ ++ if (tp->fcoe_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift; + +- if (tp->port_shift >= 0) { +- mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W; +- if (mask && !fs->mask.iport) +- return false; +- } ++ if (tp->port_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.iport << tp->port_shift; + + if (tp->vnic_shift >= 0) { +- mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W; +- +- if ((adap->params.tp.ingress_config & VNIC_F)) { +- if (mask && !fs->mask.pfvf_vld) +- return false; +- } else { +- if (mask && !fs->mask.ovlan_vld) +- return false; +- } ++ if ((adap->params.tp.ingress_config & VNIC_F)) ++ ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift; ++ else ++ ntuple_mask |= (u64)fs->mask.ovlan_vld << ++ tp->vnic_shift; + } + +- if (tp->vlan_shift >= 0) { +- mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W; +- if (mask && !fs->mask.ivlan) +- return false; +- } ++ if (tp->vlan_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift; + +- if (tp->tos_shift >= 0) { +- mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W; +- if (mask && !fs->mask.tos) +- return false; +- } ++ if (tp->tos_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift; + +- if (tp->protocol_shift >= 0) { +- mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W; +- if (mask && !fs->mask.proto) +- return false; +- } ++ if (tp->protocol_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift; + +- if (tp->ethertype_shift >= 0) { +- mask = (hash_filter_mask >> tp->ethertype_shift) & +- FT_ETHERTYPE_W; +- if (mask && !fs->mask.ethtype) +- return false; +- } ++ if (tp->ethertype_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift; + +- if (tp->macmatch_shift >= 0) { +- mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W; +- if (mask && !fs->mask.macidx) +- return false; +- } ++ if (tp->macmatch_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift; ++ ++ if (tp->matchtype_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift; ++ ++ if (tp->frag_shift >= 0) ++ ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift; ++ ++ if (ntuple_mask != hash_filter_mask) ++ return false; + +- if (tp->matchtype_shift >= 0) { +- mask = (hash_filter_mask >> tp->matchtype_shift) & +- FT_MPSHITTYPE_W; +- if (mask && !fs->mask.matchtype) +- return false; +- } +- if (tp->frag_shift >= 0) { +- mask = (hash_filter_mask >> tp->frag_shift) & +- FT_FRAGMENTATION_W; +- if (mask && !fs->mask.frag) +- return false; +- } + return true; + } + diff --git a/queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch b/queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch new file mode 100644 index 00000000000..8d744835952 --- /dev/null +++ b/queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch @@ -0,0 +1,68 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Rahul Lakkireddy +Date: Fri, 18 May 2018 19:13:37 +0530 +Subject: cxgb4: fix offset in collecting TX rate limit info + +From: Rahul Lakkireddy + +[ Upstream commit d775f26b295a0a303f7a73d7da46e04296484fe7 ] + +Correct the indirect register offsets in collecting TX rate limit info +in UP CIM logs. + +Also, T5 doesn't support these indirect register offsets, so remove +them from collection logic. + +Fixes: be6e36d916b1 ("cxgb4: collect TX rate limit info in UP CIM logs") +Signed-off-by: Rahul Lakkireddy +Signed-off-by: Ganesh Goudar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h | 28 +++++++--------------- + 1 file changed, 9 insertions(+), 19 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h ++++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +@@ -419,15 +419,15 @@ static const u32 t6_up_cim_reg_array[][I + {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */ + {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */ + {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */ +- {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */ +- {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */ +- {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */ +- {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */ +- {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */ +- {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */ +- {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */ +- {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */ +- {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */ ++ {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */ ++ {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */ ++ {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */ ++ {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */ ++ {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */ ++ {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */ ++ {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */ ++ {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */ ++ {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */ + }; + + static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { +@@ -444,16 +444,6 @@ static const u32 t5_up_cim_reg_array[][I + {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */ + {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */ + {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */ +- {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */ +- {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */ +- {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */ +- {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */ +- {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */ +- {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */ +- {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */ +- {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */ +- {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */ +- {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */ + }; + + static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = { diff --git a/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch new file mode 100644 index 00000000000..b10c7d2670a --- /dev/null +++ b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch @@ -0,0 +1,52 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Florian Fainelli +Date: Tue, 15 May 2018 16:01:25 -0700 +Subject: net: dsa: bcm_sf2: Fix IPv6 rule half deletion + +From: Florian Fainelli + +[ Upstream commit 1942adf64214df370350aa46954ba27654456f68 ] + +It was possible to delete only one half of an IPv6, which would leave +the second half still programmed and possibly in use. Instead of +checking for the unused bitmap, we need to check the unique bitmap, and +refuse any deletion that does not match that criteria. We also need to +move that check from bcm_sf2_cfp_rule_del_one() into its caller: +bcm_sf2_cfp_rule_del() otherwise we would not be able to delete second +halves anymore that would not pass the first test. + +Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2_cfp.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2_cfp.c ++++ b/drivers/net/dsa/bcm_sf2_cfp.c +@@ -790,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(stru + int ret; + u32 reg; + +- /* Refuse deletion of unused rules, and the default reserved rule */ +- if (!test_bit(loc, priv->cfp.used) || loc == 0) +- return -EINVAL; +- + /* Indicate which rule we want to read */ + bcm_sf2_cfp_rule_addr_set(priv, loc); + +@@ -831,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct b + u32 next_loc = 0; + int ret; + ++ /* Refuse deleting unused rules, and those that are not unique since ++ * that could leave IPv6 rules with one of the chained rule in the ++ * table. ++ */ ++ if (!test_bit(loc, priv->cfp.unique) || loc == 0) ++ return -EINVAL; ++ + ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc); + if (ret) + return ret; diff --git a/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch new file mode 100644 index 00000000000..a935dc3104a --- /dev/null +++ b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch @@ -0,0 +1,83 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Florian Fainelli +Date: Tue, 15 May 2018 16:01:24 -0700 +Subject: net: dsa: bcm_sf2: Fix IPv6 rules and chain ID + +From: Florian Fainelli + +[ Upstream commit 6c05561c541843b2bec2189f680bed6d20afc25b ] + +We had several issues that would make the programming of IPv6 rules both +inconsistent and error prone: + +- the chain ID that we would be asking the hardware to put in the + packet's Broadcom tag would be off by one, it would return one of the + two indexes, but not the one user-space specified + +- when an user specified a particular location to insert a CFP rule at, + we would not be returning the same index, which would be confusing if + nothing else + +- finally, like IPv4, it would be possible to overflow the last entry by + re-programming it + +Fix this by swapping the usage of rule_index[0] and rule_index[1] where +relevant in order to return a consistent and correct user-space +experience. + +Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2_cfp.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2_cfp.c ++++ b/drivers/net/dsa/bcm_sf2_cfp.c +@@ -565,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(str + * first half because the HW search is by incrementing addresses. + */ + if (fs->location == RX_CLS_LOC_ANY) +- rule_index[0] = find_first_zero_bit(priv->cfp.used, +- bcm_sf2_cfp_rule_size(priv)); ++ rule_index[1] = find_first_zero_bit(priv->cfp.used, ++ priv->num_cfp_rules); + else +- rule_index[0] = fs->location; ++ rule_index[1] = fs->location; ++ if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) ++ return -ENOSPC; + + /* Flag it as used (cleared on error path) such that we can immediately + * obtain a second one to chain from. + */ +- set_bit(rule_index[0], priv->cfp.used); ++ set_bit(rule_index[1], priv->cfp.used); + +- rule_index[1] = find_first_zero_bit(priv->cfp.used, +- bcm_sf2_cfp_rule_size(priv)); +- if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) { ++ rule_index[0] = find_first_zero_bit(priv->cfp.used, ++ priv->num_cfp_rules); ++ if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) { + ret = -ENOSPC; + goto out_err; + } +@@ -715,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(str + /* Flag the second half rule as being used now, return it as the + * location, and flag it as unique while dumping rules + */ +- set_bit(rule_index[1], priv->cfp.used); ++ set_bit(rule_index[0], priv->cfp.used); + set_bit(rule_index[1], priv->cfp.unique); + fs->location = rule_index[1]; + + return ret; + + out_err: +- clear_bit(rule_index[0], priv->cfp.used); ++ clear_bit(rule_index[1], priv->cfp.used); + return ret; + } + diff --git a/queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch b/queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch new file mode 100644 index 00000000000..c20f7797366 --- /dev/null +++ b/queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch @@ -0,0 +1,42 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Florian Fainelli +Date: Tue, 15 May 2018 16:01:23 -0700 +Subject: net: dsa: bcm_sf2: Fix RX_CLS_LOC_ANY overwrite for last rule + +From: Florian Fainelli + +[ Upstream commit 43a5e00f38fe8933a1c716bfe5b30e97f749d94b ] + +When we let the kernel pick up a rule location with RX_CLS_LOC_ANY, we +would be able to overwrite the last rules because of a number of issues. + +The IPv4 code path would not be checking that rule_index is within +bounds, and it would also only be allowed to pick up rules from range +0..126 instead of the full 0..127 range. This would lead us to allow +overwriting the last rule when we let the kernel pick-up the location. + +Fixes: 3306145866b6 ("net: dsa: bcm_sf2: Move IPv4 CFP processing to specific functions") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2_cfp.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/dsa/bcm_sf2_cfp.c ++++ b/drivers/net/dsa/bcm_sf2_cfp.c +@@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(str + /* Locate the first rule available */ + if (fs->location == RX_CLS_LOC_ANY) + rule_index = find_first_zero_bit(priv->cfp.used, +- bcm_sf2_cfp_rule_size(priv)); ++ priv->num_cfp_rules); + else + rule_index = fs->location; + ++ if (rule_index > bcm_sf2_cfp_rule_size(priv)) ++ return -ENOSPC; ++ + layout = &udf_tcpip4_layout; + /* We only use one UDF slice for now */ + slice_num = bcm_sf2_get_slice_number(layout, 0); diff --git a/queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch b/queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch new file mode 100644 index 00000000000..9eb5019169a --- /dev/null +++ b/queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch @@ -0,0 +1,50 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Florian Fainelli +Date: Thu, 17 May 2018 16:55:39 -0700 +Subject: net: dsa: Do not register devlink for unused ports + +From: Florian Fainelli + +[ Upstream commit 5447d78623da2eded06d4cd9469d1a71eba43bc4 ] + +Even if commit 1d27732f411d ("net: dsa: setup and teardown ports") indicated +that registering a devlink instance for unused ports is not a problem, and this +is true, this can be confusing nonetheless, so let's not do it. + +Fixes: 1d27732f411d ("net: dsa: setup and teardown ports") +Reported-by: Jiri Pirko +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cp + static int dsa_port_setup(struct dsa_port *dp) + { + struct dsa_switch *ds = dp->ds; +- int err; ++ int err = 0; + + memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); + +- err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index); ++ if (dp->type != DSA_PORT_TYPE_UNUSED) ++ err = devlink_port_register(ds->devlink, &dp->devlink_port, ++ dp->index); + if (err) + return err; + +@@ -293,7 +295,8 @@ static int dsa_port_setup(struct dsa_por + + static void dsa_port_teardown(struct dsa_port *dp) + { +- devlink_port_unregister(&dp->devlink_port); ++ if (dp->type != DSA_PORT_TYPE_UNUSED) ++ devlink_port_unregister(&dp->devlink_port); + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: diff --git a/queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch b/queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch new file mode 100644 index 00000000000..12ee6808265 --- /dev/null +++ b/queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch @@ -0,0 +1,33 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Amritha Nambiar +Date: Thu, 17 May 2018 14:50:44 -0700 +Subject: net: Fix a bug in removing queues from XPS map + +From: Amritha Nambiar + +[ Upstream commit 6358d49ac23995fdfe157cc8747ab0f274d3954b ] + +While removing queues from the XPS map, the individual CPU ID +alone was used to index the CPUs map, this should be changed to also +factor in the traffic class mapping for the CPU-to-queue lookup. + +Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") +Signed-off-by: Amritha Nambiar +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2097,7 +2097,7 @@ static bool remove_xps_queue_cpu(struct + int i, j; + + for (i = count, j = offset; i--; j++) { +- if (!remove_xps_queue(dev_maps, cpu, j)) ++ if (!remove_xps_queue(dev_maps, tci, j)) + break; + } + diff --git a/queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch b/queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch new file mode 100644 index 00000000000..e4e9fa44fb6 --- /dev/null +++ b/queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch @@ -0,0 +1,193 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:15 +0200 +Subject: net: ip6_gre: Fix headroom request in ip6erspan_tunnel_xmit() + +From: Petr Machata + +[ Upstream commit 5691484df961aff897d824bcc26cd1a2aa036b5b ] + +dev->needed_headroom is not primed until ip6_tnl_xmit(), so it starts +out zero. Thus the call to skb_cow_head() fails to actually make sure +there's enough headroom to push the ERSPAN headers to. That can lead to +the panic cited below. (Reproducer below that). + +Fix by requesting either needed_headroom if already primed, or just the +bare minimum needed for the header otherwise. + +[ 190.703567] kernel BUG at net/core/skbuff.c:104! +[ 190.708384] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI +[ 190.714007] Modules linked in: act_mirred cls_matchall ip6_gre ip6_tunnel tunnel6 gre sch_ingress vrf veth x86_pkg_temp_thermal mlx_platform nfsd e1000e leds_mlxcpld +[ 190.728975] CPU: 1 PID: 959 Comm: kworker/1:2 Not tainted 4.17.0-rc4-net_master-custom-139 #10 +[ 190.737647] Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2F"/"SA000874", BIOS 4.6.5 03/08/2016 +[ 190.747006] Workqueue: ipv6_addrconf addrconf_dad_work +[ 190.752222] RIP: 0010:skb_panic+0xc3/0x100 +[ 190.756358] RSP: 0018:ffff8801d54072f0 EFLAGS: 00010282 +[ 190.761629] RAX: 0000000000000085 RBX: ffff8801c1a8ecc0 RCX: 0000000000000000 +[ 190.768830] RDX: 0000000000000085 RSI: dffffc0000000000 RDI: ffffed003aa80e54 +[ 190.776025] RBP: ffff8801bd1ec5a0 R08: ffffed003aabce19 R09: ffffed003aabce19 +[ 190.783226] R10: 0000000000000001 R11: ffffed003aabce18 R12: ffff8801bf695dbe +[ 190.790418] R13: 0000000000000084 R14: 00000000000006c0 R15: ffff8801bf695dc8 +[ 190.797621] FS: 0000000000000000(0000) GS:ffff8801d5400000(0000) knlGS:0000000000000000 +[ 190.805786] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 190.811582] CR2: 000055fa929aced0 CR3: 0000000003228004 CR4: 00000000001606e0 +[ 190.818790] Call Trace: +[ 190.821264] +[ 190.823314] ? ip6erspan_tunnel_xmit+0x5e4/0x1982 [ip6_gre] +[ 190.828940] ? ip6erspan_tunnel_xmit+0x5e4/0x1982 [ip6_gre] +[ 190.834562] skb_push+0x78/0x90 +[ 190.837749] ip6erspan_tunnel_xmit+0x5e4/0x1982 [ip6_gre] +[ 190.843219] ? ip6gre_tunnel_ioctl+0xd90/0xd90 [ip6_gre] +[ 190.848577] ? debug_check_no_locks_freed+0x210/0x210 +[ 190.853679] ? debug_check_no_locks_freed+0x210/0x210 +[ 190.858783] ? print_irqtrace_events+0x120/0x120 +[ 190.863451] ? sched_clock_cpu+0x18/0x210 +[ 190.867496] ? cyc2ns_read_end+0x10/0x10 +[ 190.871474] ? skb_network_protocol+0x76/0x200 +[ 190.875977] dev_hard_start_xmit+0x137/0x770 +[ 190.880317] ? do_raw_spin_trylock+0x6d/0xa0 +[ 190.884624] sch_direct_xmit+0x2ef/0x5d0 +[ 190.888589] ? pfifo_fast_dequeue+0x3fa/0x670 +[ 190.892994] ? pfifo_fast_change_tx_queue_len+0x810/0x810 +[ 190.898455] ? __lock_is_held+0xa0/0x160 +[ 190.902422] __qdisc_run+0x39e/0xfc0 +[ 190.906041] ? _raw_spin_unlock+0x29/0x40 +[ 190.910090] ? pfifo_fast_enqueue+0x24b/0x3e0 +[ 190.914501] ? sch_direct_xmit+0x5d0/0x5d0 +[ 190.918658] ? pfifo_fast_dequeue+0x670/0x670 +[ 190.923047] ? __dev_queue_xmit+0x172/0x1770 +[ 190.927365] ? preempt_count_sub+0xf/0xd0 +[ 190.931421] __dev_queue_xmit+0x410/0x1770 +[ 190.935553] ? ___slab_alloc+0x605/0x930 +[ 190.939524] ? print_irqtrace_events+0x120/0x120 +[ 190.944186] ? memcpy+0x34/0x50 +[ 190.947364] ? netdev_pick_tx+0x1c0/0x1c0 +[ 190.951428] ? __skb_clone+0x2fd/0x3d0 +[ 190.955218] ? __copy_skb_header+0x270/0x270 +[ 190.959537] ? rcu_read_lock_sched_held+0x93/0xa0 +[ 190.964282] ? kmem_cache_alloc+0x344/0x4d0 +[ 190.968520] ? cyc2ns_read_end+0x10/0x10 +[ 190.972495] ? skb_clone+0x123/0x230 +[ 190.976112] ? skb_split+0x820/0x820 +[ 190.979747] ? tcf_mirred+0x554/0x930 [act_mirred] +[ 190.984582] tcf_mirred+0x554/0x930 [act_mirred] +[ 190.989252] ? tcf_mirred_act_wants_ingress.part.2+0x10/0x10 [act_mirred] +[ 190.996109] ? __lock_acquire+0x706/0x26e0 +[ 191.000239] ? sched_clock_cpu+0x18/0x210 +[ 191.004294] tcf_action_exec+0xcf/0x2a0 +[ 191.008179] tcf_classify+0xfa/0x340 +[ 191.011794] __netif_receive_skb_core+0x8e1/0x1c60 +[ 191.016630] ? debug_check_no_locks_freed+0x210/0x210 +[ 191.021732] ? nf_ingress+0x500/0x500 +[ 191.025458] ? process_backlog+0x347/0x4b0 +[ 191.029619] ? print_irqtrace_events+0x120/0x120 +[ 191.034302] ? lock_acquire+0xd8/0x320 +[ 191.038089] ? process_backlog+0x1b6/0x4b0 +[ 191.042246] ? process_backlog+0xc2/0x4b0 +[ 191.046303] process_backlog+0xc2/0x4b0 +[ 191.050189] net_rx_action+0x5cc/0x980 +[ 191.053991] ? napi_complete_done+0x2c0/0x2c0 +[ 191.058386] ? mark_lock+0x13d/0xb40 +[ 191.062001] ? clockevents_program_event+0x6b/0x1d0 +[ 191.066922] ? print_irqtrace_events+0x120/0x120 +[ 191.071593] ? __lock_is_held+0xa0/0x160 +[ 191.075566] __do_softirq+0x1d4/0x9d2 +[ 191.079282] ? ip6_finish_output2+0x524/0x1460 +[ 191.083771] do_softirq_own_stack+0x2a/0x40 +[ 191.087994] +[ 191.090130] do_softirq.part.13+0x38/0x40 +[ 191.094178] __local_bh_enable_ip+0x135/0x190 +[ 191.098591] ip6_finish_output2+0x54d/0x1460 +[ 191.102916] ? ip6_forward_finish+0x2f0/0x2f0 +[ 191.107314] ? ip6_mtu+0x3c/0x2c0 +[ 191.110674] ? ip6_finish_output+0x2f8/0x650 +[ 191.114992] ? ip6_output+0x12a/0x500 +[ 191.118696] ip6_output+0x12a/0x500 +[ 191.122223] ? ip6_route_dev_notify+0x5b0/0x5b0 +[ 191.126807] ? ip6_finish_output+0x650/0x650 +[ 191.131120] ? ip6_fragment+0x1a60/0x1a60 +[ 191.135182] ? icmp6_dst_alloc+0x26e/0x470 +[ 191.139317] mld_sendpack+0x672/0x830 +[ 191.143021] ? igmp6_mcf_seq_next+0x2f0/0x2f0 +[ 191.147429] ? __local_bh_enable_ip+0x77/0x190 +[ 191.151913] ipv6_mc_dad_complete+0x47/0x90 +[ 191.156144] addrconf_dad_completed+0x561/0x720 +[ 191.160731] ? addrconf_rs_timer+0x3a0/0x3a0 +[ 191.165036] ? mark_held_locks+0xc9/0x140 +[ 191.169095] ? __local_bh_enable_ip+0x77/0x190 +[ 191.173570] ? addrconf_dad_work+0x50d/0xa20 +[ 191.177886] ? addrconf_dad_work+0x529/0xa20 +[ 191.182194] addrconf_dad_work+0x529/0xa20 +[ 191.186342] ? addrconf_dad_completed+0x720/0x720 +[ 191.191088] ? __lock_is_held+0xa0/0x160 +[ 191.195059] ? process_one_work+0x45d/0xe20 +[ 191.199302] ? process_one_work+0x51e/0xe20 +[ 191.203531] ? rcu_read_lock_sched_held+0x93/0xa0 +[ 191.208279] process_one_work+0x51e/0xe20 +[ 191.212340] ? pwq_dec_nr_in_flight+0x200/0x200 +[ 191.216912] ? get_lock_stats+0x4b/0xf0 +[ 191.220788] ? preempt_count_sub+0xf/0xd0 +[ 191.224844] ? worker_thread+0x219/0x860 +[ 191.228823] ? do_raw_spin_trylock+0x6d/0xa0 +[ 191.233142] worker_thread+0xeb/0x860 +[ 191.236848] ? process_one_work+0xe20/0xe20 +[ 191.241095] kthread+0x206/0x300 +[ 191.244352] ? process_one_work+0xe20/0xe20 +[ 191.248587] ? kthread_stop+0x570/0x570 +[ 191.252459] ret_from_fork+0x3a/0x50 +[ 191.256082] Code: 14 3e ff 8b 4b 78 55 4d 89 f9 41 56 41 55 48 c7 c7 a0 cf db 82 41 54 44 8b 44 24 2c 48 8b 54 24 30 48 8b 74 24 20 e8 16 94 13 ff <0f> 0b 48 c7 c7 60 8e 1f 85 48 83 c4 20 e8 55 ef a6 ff 89 74 24 +[ 191.275327] RIP: skb_panic+0xc3/0x100 RSP: ffff8801d54072f0 +[ 191.281024] ---[ end trace 7ea51094e099e006 ]--- +[ 191.285724] Kernel panic - not syncing: Fatal exception in interrupt +[ 191.292168] Kernel Offset: disabled +[ 191.295697] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- + +Reproducer: + + ip link add h1 type veth peer name swp1 + ip link add h3 type veth peer name swp3 + + ip link set dev h1 up + ip address add 192.0.2.1/28 dev h1 + + ip link add dev vh3 type vrf table 20 + ip link set dev h3 master vh3 + ip link set dev vh3 up + ip link set dev h3 up + + ip link set dev swp3 up + ip address add dev swp3 2001:db8:2::1/64 + + ip link set dev swp1 up + tc qdisc add dev swp1 clsact + + ip link add name gt6 type ip6erspan \ + local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123 + ip link set dev gt6 up + + sleep 1 + + tc filter add dev swp1 ingress pref 1000 matchall skip_hw \ + action mirred egress mirror dev gt6 + ping -I h1 192.0.2.2 + +Fixes: e41c7c68ea77 ("ip6erspan: make sure enough headroom at xmit.") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -908,7 +908,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit + truncate = true; + } + +- if (skb_cow_head(skb, dev->needed_headroom)) ++ if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) + goto tx_err; + + t->parms.o_flags &= ~TUNNEL_KEY; diff --git a/queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch b/queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch new file mode 100644 index 00000000000..2d8d2ba9d19 --- /dev/null +++ b/queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch @@ -0,0 +1,151 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:51 +0200 +Subject: net: ip6_gre: Fix ip6erspan hlen calculation + +From: Petr Machata + +[ Upstream commit 2d665034f239412927b1e71329f20f001c92da09 ] + +Even though ip6erspan_tap_init() sets up hlen and tun_hlen according to +what ERSPAN needs, it goes ahead to call ip6gre_tnl_link_config() which +overwrites these settings with GRE-specific ones. + +Similarly for changelink callbacks, which are handled by +ip6gre_changelink() calls ip6gre_tnl_change() calls +ip6gre_tnl_link_config() as well. + +The difference ends up being 12 vs. 20 bytes, and this is generally not +a problem, because a 12-byte request likely ends up allocating more and +the extra 8 bytes are thus available. However correct it is not. + +So replace the newlink and changelink callbacks with an ERSPAN-specific +ones, reusing the newly-introduced _common() functions. + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 65 insertions(+), 9 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -81,6 +81,7 @@ static int ip6gre_tunnel_init(struct net + static void ip6gre_tunnel_setup(struct net_device *dev); + static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); + static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); ++static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu); + + /* Tunnel hash table */ + +@@ -1746,6 +1747,19 @@ static const struct net_device_ops ip6gr + .ndo_get_iflink = ip6_tnl_get_iflink, + }; + ++static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel) ++{ ++ int t_hlen; ++ ++ tunnel->tun_hlen = 8; ++ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + ++ erspan_hdr_len(tunnel->parms.erspan_ver); ++ ++ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); ++ tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; ++ return t_hlen; ++} ++ + static int ip6erspan_tap_init(struct net_device *dev) + { + struct ip6_tnl *tunnel; +@@ -1769,12 +1783,7 @@ static int ip6erspan_tap_init(struct net + return ret; + } + +- tunnel->tun_hlen = 8; +- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + +- erspan_hdr_len(tunnel->parms.erspan_ver); +- t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); +- +- dev->hard_header_len = LL_MAX_HEADER + t_hlen; ++ t_hlen = ip6erspan_calc_hlen(tunnel); + dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; +@@ -1783,7 +1792,7 @@ static int ip6erspan_tap_init(struct net + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); +- ip6gre_tnl_link_config(tunnel, 1); ++ ip6erspan_tnl_link_config(tunnel, 1); + + return 0; + } +@@ -2108,6 +2117,53 @@ static void ip6erspan_tap_setup(struct n + netif_keep_dst(dev); + } + ++static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[], ++ struct netlink_ext_ack *extack) ++{ ++ int err = ip6gre_newlink_common(src_net, dev, tb, data, extack); ++ struct ip6_tnl *nt = netdev_priv(dev); ++ struct net *net = dev_net(dev); ++ ++ if (!err) { ++ ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]); ++ ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); ++ } ++ return err; ++} ++ ++static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu) ++{ ++ ip6gre_tnl_link_config_common(t); ++ ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t)); ++} ++ ++static int ip6erspan_tnl_change(struct ip6_tnl *t, ++ const struct __ip6_tnl_parm *p, int set_mtu) ++{ ++ ip6gre_tnl_copy_tnl_parm(t, p); ++ ip6erspan_tnl_link_config(t, set_mtu); ++ return 0; ++} ++ ++static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], ++ struct nlattr *data[], ++ struct netlink_ext_ack *extack) ++{ ++ struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); ++ struct __ip6_tnl_parm p; ++ struct ip6_tnl *t; ++ ++ t = ip6gre_changelink_common(dev, tb, data, &p, extack); ++ if (IS_ERR(t)) ++ return PTR_ERR(t); ++ ++ ip6gre_tunnel_unlink(ign, t); ++ ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); ++ ip6gre_tunnel_link(ign, t); ++ return 0; ++} ++ + static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = IFLA_GRE_MAX, +@@ -2144,8 +2200,8 @@ static struct rtnl_link_ops ip6erspan_ta + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6erspan_tap_setup, + .validate = ip6erspan_tap_validate, +- .newlink = ip6gre_newlink, +- .changelink = ip6gre_changelink, ++ .newlink = ip6erspan_newlink, ++ .changelink = ip6erspan_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, diff --git a/queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch b/queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch new file mode 100644 index 00000000000..4683e058296 --- /dev/null +++ b/queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch @@ -0,0 +1,248 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: William Tu +Date: Fri, 18 May 2018 19:22:28 -0700 +Subject: net: ip6_gre: fix tunnel metadata device sharing. + +From: William Tu + +[ Upstream commit b80d0b93b991e551a32157e0d9d38fc5bc9348a7 ] + +Currently ip6gre and ip6erspan share single metadata mode device, +using 'collect_md_tun'. Thus, when doing: + ip link add dev ip6gre11 type ip6gretap external + ip link add dev ip6erspan12 type ip6erspan external + RTNETLINK answers: File exists +simply fails due to the 2nd tries to create the same collect_md_tun. + +The patch fixes it by adding a separate collect md tunnel device +for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple +of places need to refactor/split up in order to distinguish ip6gre +and ip6erspan. + +First, move the collect_md check at ip6gre_tunnel_{unlink,link} and +create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}. +Then before link/unlink, make sure the link_md/unlink_md is called. +Finally, a separate ndo_uninit is created for ip6erspan. Tested it +using the samples/bpf/test_tunnel_bpf.sh. + +Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode") +Signed-off-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 101 +++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 79 insertions(+), 22 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -71,6 +71,7 @@ struct ip6gre_net { + struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; + + struct ip6_tnl __rcu *collect_md_tun; ++ struct ip6_tnl __rcu *collect_md_tun_erspan; + struct net_device *fb_tunnel_dev; + }; + +@@ -233,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_loo + if (cand) + return cand; + +- t = rcu_dereference(ign->collect_md_tun); ++ if (gre_proto == htons(ETH_P_ERSPAN) || ++ gre_proto == htons(ETH_P_ERSPAN2)) ++ t = rcu_dereference(ign->collect_md_tun_erspan); ++ else ++ t = rcu_dereference(ign->collect_md_tun); ++ + if (t && t->dev->flags & IFF_UP) + return t; + +@@ -262,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_b + return &ign->tunnels[prio][h]; + } + ++static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) ++{ ++ if (t->parms.collect_md) ++ rcu_assign_pointer(ign->collect_md_tun, t); ++} ++ ++static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) ++{ ++ if (t->parms.collect_md) ++ rcu_assign_pointer(ign->collect_md_tun_erspan, t); ++} ++ ++static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t) ++{ ++ if (t->parms.collect_md) ++ rcu_assign_pointer(ign->collect_md_tun, NULL); ++} ++ ++static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign, ++ struct ip6_tnl *t) ++{ ++ if (t->parms.collect_md) ++ rcu_assign_pointer(ign->collect_md_tun_erspan, NULL); ++} ++ + static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, + const struct ip6_tnl *t) + { +@@ -272,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip + { + struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + +- if (t->parms.collect_md) +- rcu_assign_pointer(ign->collect_md_tun, t); +- + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); + } +@@ -284,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + +- if (t->parms.collect_md) +- rcu_assign_pointer(ign->collect_md_tun, NULL); +- + for (tp = ip6gre_bucket(ign, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { +@@ -375,11 +400,23 @@ failed_free: + return NULL; + } + ++static void ip6erspan_tunnel_uninit(struct net_device *dev) ++{ ++ struct ip6_tnl *t = netdev_priv(dev); ++ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ++ ++ ip6erspan_tunnel_unlink_md(ign, t); ++ ip6gre_tunnel_unlink(ign, t); ++ dst_cache_reset(&t->dst_cache); ++ dev_put(dev); ++} ++ + static void ip6gre_tunnel_uninit(struct net_device *dev) + { + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + ++ ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + dst_cache_reset(&t->dst_cache); + dev_put(dev); +@@ -1799,7 +1836,7 @@ static int ip6erspan_tap_init(struct net + + static const struct net_device_ops ip6erspan_netdev_ops = { + .ndo_init = ip6erspan_tap_init, +- .ndo_uninit = ip6gre_tunnel_uninit, ++ .ndo_uninit = ip6erspan_tunnel_uninit, + .ndo_start_xmit = ip6erspan_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +@@ -1862,8 +1899,6 @@ static int ip6gre_newlink_common(struct + struct netlink_ext_ack *extack) + { + struct ip6_tnl *nt; +- struct net *net = dev_net(dev); +- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip_tunnel_encap ipencap; + int err; + +@@ -1876,16 +1911,6 @@ static int ip6gre_newlink_common(struct + return err; + } + +- ip6gre_netlink_parms(data, &nt->parms); +- +- if (nt->parms.collect_md) { +- if (rtnl_dereference(ign->collect_md_tun)) +- return -EEXIST; +- } else { +- if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) +- return -EEXIST; +- } +- + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + +@@ -1909,12 +1934,26 @@ static int ip6gre_newlink(struct net *sr + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) + { +- int err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + struct ip6_tnl *nt = netdev_priv(dev); + struct net *net = dev_net(dev); ++ struct ip6gre_net *ign; ++ int err; ++ ++ ip6gre_netlink_parms(data, &nt->parms); ++ ign = net_generic(net, ip6gre_net_id); ++ ++ if (nt->parms.collect_md) { ++ if (rtnl_dereference(ign->collect_md_tun)) ++ return -EEXIST; ++ } else { ++ if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) ++ return -EEXIST; ++ } + ++ err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + if (!err) { + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); ++ ip6gre_tunnel_link_md(ign, nt); + ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); + } + return err; +@@ -1966,8 +2005,10 @@ static int ip6gre_changelink(struct net_ + if (IS_ERR(t)) + return PTR_ERR(t); + ++ ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); ++ ip6gre_tunnel_link_md(ign, t); + ip6gre_tunnel_link(ign, t); + return 0; + } +@@ -2121,12 +2162,26 @@ static int ip6erspan_newlink(struct net + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) + { +- int err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + struct ip6_tnl *nt = netdev_priv(dev); + struct net *net = dev_net(dev); ++ struct ip6gre_net *ign; ++ int err; ++ ++ ip6gre_netlink_parms(data, &nt->parms); ++ ign = net_generic(net, ip6gre_net_id); ++ ++ if (nt->parms.collect_md) { ++ if (rtnl_dereference(ign->collect_md_tun_erspan)) ++ return -EEXIST; ++ } else { ++ if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) ++ return -EEXIST; ++ } + ++ err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + if (!err) { + ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]); ++ ip6erspan_tunnel_link_md(ign, nt); + ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); + } + return err; +@@ -2158,8 +2213,10 @@ static int ip6erspan_changelink(struct n + if (IS_ERR(t)) + return PTR_ERR(t); + ++ ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); ++ ip6erspan_tunnel_link_md(ign, t); + ip6gre_tunnel_link(ign, t); + return 0; + } diff --git a/queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch b/queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch new file mode 100644 index 00000000000..80c87ec83eb --- /dev/null +++ b/queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch @@ -0,0 +1,155 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:10 +0200 +Subject: net: ip6_gre: Request headroom in __gre6_xmit() + +From: Petr Machata + +[ Upstream commit 01b8d064d58b4c1f0eff47f8fe8a8508cb3b3840 ] + +__gre6_xmit() pushes GRE headers before handing over to ip6_tnl_xmit() +for generic IP-in-IP processing. However it doesn't make sure that there +is enough headroom to push the header to. That can lead to the panic +cited below. (Reproducer below that). + +Fix by requesting either needed_headroom if already primed, or just the +bare minimum needed for the header otherwise. + +[ 158.576725] kernel BUG at net/core/skbuff.c:104! +[ 158.581510] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI +[ 158.587174] Modules linked in: act_mirred cls_matchall ip6_gre ip6_tunnel tunnel6 gre sch_ingress vrf veth x86_pkg_temp_thermal mlx_platform nfsd e1000e leds_mlxcpld +[ 158.602268] CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 4.17.0-rc4-net_master-custom-139 #10 +[ 158.610938] Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2F"/"SA000874", BIOS 4.6.5 03/08/2016 +[ 158.620426] RIP: 0010:skb_panic+0xc3/0x100 +[ 158.624586] RSP: 0018:ffff8801d3f27110 EFLAGS: 00010286 +[ 158.629882] RAX: 0000000000000082 RBX: ffff8801c02cc040 RCX: 0000000000000000 +[ 158.637127] RDX: 0000000000000082 RSI: dffffc0000000000 RDI: ffffed003a7e4e18 +[ 158.644366] RBP: ffff8801bfec8020 R08: ffffed003aabce19 R09: ffffed003aabce19 +[ 158.651574] R10: 000000000000000b R11: ffffed003aabce18 R12: ffff8801c364de66 +[ 158.658786] R13: 000000000000002c R14: 00000000000000c0 R15: ffff8801c364de68 +[ 158.666007] FS: 0000000000000000(0000) GS:ffff8801d5400000(0000) knlGS:0000000000000000 +[ 158.674212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 158.680036] CR2: 00007f4b3702dcd0 CR3: 0000000003228002 CR4: 00000000001606e0 +[ 158.687228] Call Trace: +[ 158.689752] ? __gre6_xmit+0x246/0xd80 [ip6_gre] +[ 158.694475] ? __gre6_xmit+0x246/0xd80 [ip6_gre] +[ 158.699141] skb_push+0x78/0x90 +[ 158.702344] __gre6_xmit+0x246/0xd80 [ip6_gre] +[ 158.706872] ip6gre_tunnel_xmit+0x3bc/0x610 [ip6_gre] +[ 158.711992] ? __gre6_xmit+0xd80/0xd80 [ip6_gre] +[ 158.716668] ? debug_check_no_locks_freed+0x210/0x210 +[ 158.721761] ? print_irqtrace_events+0x120/0x120 +[ 158.726461] ? sched_clock_cpu+0x18/0x210 +[ 158.730572] ? sched_clock_cpu+0x18/0x210 +[ 158.734692] ? cyc2ns_read_end+0x10/0x10 +[ 158.738705] ? skb_network_protocol+0x76/0x200 +[ 158.743216] ? netif_skb_features+0x1b2/0x550 +[ 158.747648] dev_hard_start_xmit+0x137/0x770 +[ 158.752010] sch_direct_xmit+0x2ef/0x5d0 +[ 158.755992] ? pfifo_fast_dequeue+0x3fa/0x670 +[ 158.760460] ? pfifo_fast_change_tx_queue_len+0x810/0x810 +[ 158.765975] ? __lock_is_held+0xa0/0x160 +[ 158.770002] __qdisc_run+0x39e/0xfc0 +[ 158.773673] ? _raw_spin_unlock+0x29/0x40 +[ 158.777781] ? pfifo_fast_enqueue+0x24b/0x3e0 +[ 158.782191] ? sch_direct_xmit+0x5d0/0x5d0 +[ 158.786372] ? pfifo_fast_dequeue+0x670/0x670 +[ 158.790818] ? __dev_queue_xmit+0x172/0x1770 +[ 158.795195] ? preempt_count_sub+0xf/0xd0 +[ 158.799313] __dev_queue_xmit+0x410/0x1770 +[ 158.803512] ? ___slab_alloc+0x605/0x930 +[ 158.807525] ? ___slab_alloc+0x605/0x930 +[ 158.811540] ? memcpy+0x34/0x50 +[ 158.814768] ? netdev_pick_tx+0x1c0/0x1c0 +[ 158.818895] ? __skb_clone+0x2fd/0x3d0 +[ 158.822712] ? __copy_skb_header+0x270/0x270 +[ 158.827079] ? rcu_read_lock_sched_held+0x93/0xa0 +[ 158.831903] ? kmem_cache_alloc+0x344/0x4d0 +[ 158.836199] ? skb_clone+0x123/0x230 +[ 158.839869] ? skb_split+0x820/0x820 +[ 158.843521] ? tcf_mirred+0x554/0x930 [act_mirred] +[ 158.848407] tcf_mirred+0x554/0x930 [act_mirred] +[ 158.853104] ? tcf_mirred_act_wants_ingress.part.2+0x10/0x10 [act_mirred] +[ 158.860005] ? __lock_acquire+0x706/0x26e0 +[ 158.864162] ? mark_lock+0x13d/0xb40 +[ 158.867832] tcf_action_exec+0xcf/0x2a0 +[ 158.871736] tcf_classify+0xfa/0x340 +[ 158.875402] __netif_receive_skb_core+0x8e1/0x1c60 +[ 158.880334] ? nf_ingress+0x500/0x500 +[ 158.884059] ? process_backlog+0x347/0x4b0 +[ 158.888241] ? lock_acquire+0xd8/0x320 +[ 158.892050] ? process_backlog+0x1b6/0x4b0 +[ 158.896228] ? process_backlog+0xc2/0x4b0 +[ 158.900291] process_backlog+0xc2/0x4b0 +[ 158.904210] net_rx_action+0x5cc/0x980 +[ 158.908047] ? napi_complete_done+0x2c0/0x2c0 +[ 158.912525] ? rcu_read_unlock+0x80/0x80 +[ 158.916534] ? __lock_is_held+0x34/0x160 +[ 158.920541] __do_softirq+0x1d4/0x9d2 +[ 158.924308] ? trace_event_raw_event_irq_handler_exit+0x140/0x140 +[ 158.930515] run_ksoftirqd+0x1d/0x40 +[ 158.934152] smpboot_thread_fn+0x32b/0x690 +[ 158.938299] ? sort_range+0x20/0x20 +[ 158.941842] ? preempt_count_sub+0xf/0xd0 +[ 158.945940] ? schedule+0x5b/0x140 +[ 158.949412] kthread+0x206/0x300 +[ 158.952689] ? sort_range+0x20/0x20 +[ 158.956249] ? kthread_stop+0x570/0x570 +[ 158.960164] ret_from_fork+0x3a/0x50 +[ 158.963823] Code: 14 3e ff 8b 4b 78 55 4d 89 f9 41 56 41 55 48 c7 c7 a0 cf db 82 41 54 44 8b 44 24 2c 48 8b 54 24 30 48 8b 74 24 20 e8 16 94 13 ff <0f> 0b 48 c7 c7 60 8e 1f 85 48 83 c4 20 e8 55 ef a6 ff 89 74 24 +[ 158.983235] RIP: skb_panic+0xc3/0x100 RSP: ffff8801d3f27110 +[ 158.988935] ---[ end trace 5af56ee845aa6cc8 ]--- +[ 158.993641] Kernel panic - not syncing: Fatal exception in interrupt +[ 159.000176] Kernel Offset: disabled +[ 159.003767] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- + +Reproducer: + + ip link add h1 type veth peer name swp1 + ip link add h3 type veth peer name swp3 + + ip link set dev h1 up + ip address add 192.0.2.1/28 dev h1 + + ip link add dev vh3 type vrf table 20 + ip link set dev h3 master vh3 + ip link set dev vh3 up + ip link set dev h3 up + + ip link set dev swp3 up + ip address add dev swp3 2001:db8:2::1/64 + + ip link set dev swp1 up + tc qdisc add dev swp1 clsact + + ip link add name gt6 type ip6gretap \ + local 2001:db8:2::1 remote 2001:db8:2::2 + ip link set dev gt6 up + + sleep 1 + + tc filter add dev swp1 ingress pref 1000 matchall skip_hw \ + action mirred egress mirror dev gt6 + ping -I h1 192.0.2.2 + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -701,6 +701,9 @@ static netdev_tx_t __gre6_xmit(struct sk + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + ++ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) ++ return -ENOMEM; ++ + /* Push GRE header. */ + protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; + diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch new file mode 100644 index 00000000000..6fc6da61975 --- /dev/null +++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch @@ -0,0 +1,87 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:45 +0200 +Subject: net: ip6_gre: Split up ip6gre_changelink() + +From: Petr Machata + +[ Upstream commit c8632fc30bb03aa0c3bd7bcce85355a10feb8149 ] + +Extract from ip6gre_changelink() a reusable function +ip6gre_changelink_common(). This will allow introduction of +ERSPAN-specific _changelink() function with not a lot of code +duplication. + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 33 ++++++++++++++++++++++++--------- + 1 file changed, 24 insertions(+), 9 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1911,37 +1911,52 @@ static int ip6gre_newlink(struct net *sr + return err; + } + +-static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], +- struct nlattr *data[], +- struct netlink_ext_ack *extack) ++static struct ip6_tnl * ++ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[], ++ struct nlattr *data[], struct __ip6_tnl_parm *p_p, ++ struct netlink_ext_ack *extack) + { + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); +- struct __ip6_tnl_parm p; + struct ip_tunnel_encap ipencap; + + if (dev == ign->fb_tunnel_dev) +- return -EINVAL; ++ return ERR_PTR(-EINVAL); + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) +- return err; ++ return ERR_PTR(err); + } + +- ip6gre_netlink_parms(data, &p); ++ ip6gre_netlink_parms(data, p_p); + +- t = ip6gre_tunnel_locate(net, &p, 0); ++ t = ip6gre_tunnel_locate(net, p_p, 0); + + if (t) { + if (t->dev != dev) +- return -EEXIST; ++ return ERR_PTR(-EEXIST); + } else { + t = nt; + } + ++ return t; ++} ++ ++static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], ++ struct nlattr *data[], ++ struct netlink_ext_ack *extack) ++{ ++ struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); ++ struct __ip6_tnl_parm p; ++ struct ip6_tnl *t; ++ ++ t = ip6gre_changelink_common(dev, tb, data, &p, extack); ++ if (IS_ERR(t)) ++ return PTR_ERR(t); ++ + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch new file mode 100644 index 00000000000..d6010d93da1 --- /dev/null +++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch @@ -0,0 +1,74 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:39 +0200 +Subject: net: ip6_gre: Split up ip6gre_newlink() + +From: Petr Machata + +[ Upstream commit 7fa38a7c852ec99e3a7fc375eb2c21c50c2e46b8 ] + +Extract from ip6gre_newlink() a reusable function +ip6gre_newlink_common(). The ip6gre_tnl_link_config() call needs to be +made customizable for ERSPAN, thus reorder it with calls to +ip6_tnl_change_mtu() and dev_hold(), and extract the whole tail to the +caller, ip6gre_newlink(). Thus enable an ERSPAN-specific _newlink() +function without a lot of duplicity. + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1848,9 +1848,9 @@ static bool ip6gre_netlink_encap_parms(s + return ret; + } + +-static int ip6gre_newlink(struct net *src_net, struct net_device *dev, +- struct nlattr *tb[], struct nlattr *data[], +- struct netlink_ext_ack *extack) ++static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[], ++ struct netlink_ext_ack *extack) + { + struct ip6_tnl *nt; + struct net *net = dev_net(dev); +@@ -1887,18 +1887,30 @@ static int ip6gre_newlink(struct net *sr + if (err) + goto out; + +- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); +- + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + + dev_hold(dev); +- ip6gre_tunnel_link(ign, nt); + + out: + return err; + } + ++static int ip6gre_newlink(struct net *src_net, struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[], ++ struct netlink_ext_ack *extack) ++{ ++ int err = ip6gre_newlink_common(src_net, dev, tb, data, extack); ++ struct ip6_tnl *nt = netdev_priv(dev); ++ struct net *net = dev_net(dev); ++ ++ if (!err) { ++ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); ++ ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); ++ } ++ return err; ++} ++ + static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch new file mode 100644 index 00000000000..0d07dbc6b58 --- /dev/null +++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch @@ -0,0 +1,48 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:33 +0200 +Subject: net: ip6_gre: Split up ip6gre_tnl_change() + +From: Petr Machata + +[ Upstream commit a6465350ef495f5cbd76a3e505d25a01d648477e ] + +Split a reusable function ip6gre_tnl_copy_tnl_parm() from +ip6gre_tnl_change(). This will allow ERSPAN-specific code to +reuse the common parts while customizing the behavior for ERSPAN. + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1106,8 +1106,8 @@ static void ip6gre_tnl_link_config(struc + ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t)); + } + +-static int ip6gre_tnl_change(struct ip6_tnl *t, +- const struct __ip6_tnl_parm *p, int set_mtu) ++static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, ++ const struct __ip6_tnl_parm *p) + { + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; +@@ -1123,6 +1123,12 @@ static int ip6gre_tnl_change(struct ip6_ + t->parms.o_flags = p->o_flags; + t->parms.fwmark = p->fwmark; + dst_cache_reset(&t->dst_cache); ++} ++ ++static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, ++ int set_mtu) ++{ ++ ip6gre_tnl_copy_tnl_parm(t, p); + ip6gre_tnl_link_config(t, set_mtu); + return 0; + } diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch new file mode 100644 index 00000000000..a32525c56b6 --- /dev/null +++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch @@ -0,0 +1,98 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Petr Machata +Date: Thu, 17 May 2018 16:36:27 +0200 +Subject: net: ip6_gre: Split up ip6gre_tnl_link_config() + +From: Petr Machata + +[ Upstream commit a483373ead61e6079bc8ebe27e2dfdb2e3c1559f ] + +The function ip6gre_tnl_link_config() is used for setting up +configuration of both ip6gretap and ip6erspan tunnels. Split the +function into the common part and the route-lookup part. The latter then +takes the calculated header length as an argument. This split will allow +the patches down the line to sneak in a custom header length computation +for the ERSPAN tunnel. + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Petr Machata +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 38 ++++++++++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 12 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1019,12 +1019,11 @@ tx_err: + return NETDEV_TX_OK; + } + +-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) ++static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) + { + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; +- int t_hlen; + + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); +@@ -1051,12 +1050,13 @@ static void ip6gre_tnl_link_config(struc + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; ++} + +- t->tun_hlen = gre_calc_hlen(t->parms.o_flags); +- +- t->hlen = t->encap_hlen + t->tun_hlen; +- +- t_hlen = t->hlen + sizeof(struct ipv6hdr); ++static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, ++ int t_hlen) ++{ ++ const struct __ip6_tnl_parm *p = &t->parms; ++ struct net_device *dev = t->dev; + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & +@@ -1088,6 +1088,24 @@ static void ip6gre_tnl_link_config(struc + } + } + ++static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) ++{ ++ int t_hlen; ++ ++ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); ++ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; ++ ++ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); ++ tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; ++ return t_hlen; ++} ++ ++static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) ++{ ++ ip6gre_tnl_link_config_common(t); ++ ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t)); ++} ++ + static int ip6gre_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) + { +@@ -1381,11 +1399,7 @@ static int ip6gre_tunnel_init_common(str + return ret; + } + +- tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); +- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; +- t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); +- +- dev->hard_header_len = LL_MAX_HEADER + t_hlen; ++ t_hlen = ip6gre_calc_hlen(tunnel); + dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; diff --git a/queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch b/queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch new file mode 100644 index 00000000000..fb87ec5c252 --- /dev/null +++ b/queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch @@ -0,0 +1,48 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Tarick Bedeir +Date: Sun, 13 May 2018 16:38:45 -0700 +Subject: net/mlx4_core: Fix error handling in mlx4_init_port_info. + +From: Tarick Bedeir + +[ Upstream commit 57f6f99fdad9984801cde05c1db68fe39b474a10 ] + +Avoid exiting the function with a lingering sysfs file (if the first +call to device_create_file() fails while the second succeeds), and avoid +calling devlink_port_unregister() twice. + +In other words, either mlx4_init_port_info() succeeds and returns zero, or +it fails, returns non-zero, and requires no cleanup. + +Fixes: 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports") +Signed-off-by: Tarick Bedeir +Reviewed-by: Leon Romanovsky +Reviewed-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/main.c ++++ b/drivers/net/ethernet/mellanox/mlx4/main.c +@@ -3007,6 +3007,7 @@ static int mlx4_init_port_info(struct ml + mlx4_err(dev, "Failed to create file for port %d\n", port); + devlink_port_unregister(&info->devlink_port); + info->port = -1; ++ return err; + } + + sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); +@@ -3028,9 +3029,10 @@ static int mlx4_init_port_info(struct ml + &info->port_attr); + devlink_port_unregister(&info->devlink_port); + info->port = -1; ++ return err; + } + +- return err; ++ return 0; + } + + static void mlx4_cleanup_port_info(struct mlx4_port_info *info) diff --git a/queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch b/queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch new file mode 100644 index 00000000000..141c7f0061a --- /dev/null +++ b/queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch @@ -0,0 +1,34 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Davide Caratti +Date: Wed, 16 May 2018 12:54:29 +0200 +Subject: net/sched: fix refcnt leak in the error path of tcf_vlan_init() + +From: Davide Caratti + +[ Upstream commit 5a4931ae0193f8a4a97e8260fd0df1d705d83299 ] + +Similarly to what was done with commit a52956dfc503 ("net sched actions: +fix refcnt leak in skbmod"), fix the error path of tcf_vlan_init() to avoid +refcnt leaks when wrong value of TCA_VLAN_PUSH_VLAN_PROTOCOL is given. + +Fixes: 5026c9b1bafc ("net sched: vlan action fix late binding") +CC: Roman Mashak +Signed-off-by: Davide Caratti +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_vlan.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/act_vlan.c ++++ b/net/sched/act_vlan.c +@@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net + case htons(ETH_P_8021AD): + break; + default: ++ if (exists) ++ tcf_idr_release(*a, bind); + return -EPROTONOSUPPORT; + } + } else { diff --git a/queue-4.16/net-sched-red-avoid-hashing-null-child.patch b/queue-4.16/net-sched-red-avoid-hashing-null-child.patch new file mode 100644 index 00000000000..d748178386f --- /dev/null +++ b/queue-4.16/net-sched-red-avoid-hashing-null-child.patch @@ -0,0 +1,108 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Paolo Abeni +Date: Fri, 18 May 2018 14:51:44 +0200 +Subject: net: sched: red: avoid hashing NULL child + +From: Paolo Abeni + +[ Upstream commit 44a63b137f7b6e4c7bd6c9cc21615941cb36509d ] + +Hangbin reported an Oops triggered by the syzkaller qdisc rules: + + kasan: GPF could be caused by NULL-ptr deref or user memory access + general protection fault: 0000 [#1] SMP KASAN PTI + Modules linked in: sch_red + CPU: 0 PID: 28699 Comm: syz-executor5 Not tainted 4.17.0-rc4.kcov #1 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + RIP: 0010:qdisc_hash_add+0x26/0xa0 + RSP: 0018:ffff8800589cf470 EFLAGS: 00010203 + RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff824ad971 + RDX: 0000000000000007 RSI: ffffc9000ce9f000 RDI: 000000000000003c + RBP: 0000000000000001 R08: ffffed000b139ea2 R09: ffff8800589cf4f0 + R10: ffff8800589cf50f R11: ffffed000b139ea2 R12: ffff880054019fc0 + R13: ffff880054019fb4 R14: ffff88005c0af600 R15: ffff880054019fb0 + FS: 00007fa6edcb1700(0000) GS:ffff88005ce00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000020000740 CR3: 000000000fc16000 CR4: 00000000000006f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + red_change+0x2d2/0xed0 [sch_red] + qdisc_create+0x57e/0xef0 + tc_modify_qdisc+0x47f/0x14e0 + rtnetlink_rcv_msg+0x6a8/0x920 + netlink_rcv_skb+0x2a2/0x3c0 + netlink_unicast+0x511/0x740 + netlink_sendmsg+0x825/0xc30 + sock_sendmsg+0xc5/0x100 + ___sys_sendmsg+0x778/0x8e0 + __sys_sendmsg+0xf5/0x1b0 + do_syscall_64+0xbd/0x3b0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x450869 + RSP: 002b:00007fa6edcb0c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e + RAX: ffffffffffffffda RBX: 00007fa6edcb16b4 RCX: 0000000000450869 + RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000013 + RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff + R13: 0000000000008778 R14: 0000000000702838 R15: 00007fa6edcb1700 + Code: e9 0b fe ff ff 0f 1f 44 00 00 55 53 48 89 fb 89 f5 e8 3f 07 f3 fe 48 8d 7b 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 51 + RIP: qdisc_hash_add+0x26/0xa0 RSP: ffff8800589cf470 + +When a red qdisc is updated with a 0 limit, the child qdisc is left +unmodified, no additional scheduler is created in red_change(), +the 'child' local variable is rightfully NULL and must not add it +to the hash table. + +This change addresses the above issue moving qdisc_hash_add() right +after the child qdisc creation. It additionally removes unneeded checks +for noop_qdisc. + +Reported-by: Hangbin Liu +Fixes: 49b499718fa1 ("net: sched: make default fifo qdiscs appear in the dump") +Signed-off-by: Paolo Abeni +Acked-by: Jiri Kosina +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_red.c | 5 +++-- + net/sched/sch_tbf.c | 5 +++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_red.c ++++ b/net/sched/sch_red.c +@@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch, + extack); + if (IS_ERR(child)) + return PTR_ERR(child); +- } + +- if (child != &noop_qdisc) ++ /* child is fifo, no need to check for noop_qdisc */ + qdisc_hash_add(child, true); ++ } ++ + sch_tree_lock(sch); + q->flags = ctl->flags; + q->limit = ctl->limit; +--- a/net/sched/sch_tbf.c ++++ b/net/sched/sch_tbf.c +@@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch, + err = PTR_ERR(child); + goto done; + } ++ ++ /* child is fifo, no need to check for noop_qdisc */ ++ qdisc_hash_add(child, true); + } + + sch_tree_lock(sch); +@@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch, + q->qdisc->qstats.backlog); + qdisc_destroy(q->qdisc); + q->qdisc = child; +- if (child != &noop_qdisc) +- qdisc_hash_add(child, true); + } + q->limit = qopt->limit; + if (tb[TCA_TBF_PBURST]) diff --git a/queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch b/queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch new file mode 100644 index 00000000000..43b051cc1c3 --- /dev/null +++ b/queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch @@ -0,0 +1,132 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Eric Biggers +Date: Sun, 13 May 2018 17:01:30 -0700 +Subject: net/smc: check for missing nlattrs in SMC_PNETID messages + +From: Eric Biggers + +[ Upstream commit d49baa7e12ee70c0a7b821d088a770c94c02e494 ] + +It's possible to crash the kernel in several different ways by sending +messages to the SMC_PNETID generic netlink family that are missing the +expected attributes: + +- Missing SMC_PNETID_NAME => null pointer dereference when comparing + names. +- Missing SMC_PNETID_ETHNAME => null pointer dereference accessing + smc_pnetentry::ndev. +- Missing SMC_PNETID_IBNAME => null pointer dereference accessing + smc_pnetentry::smcibdev. +- Missing SMC_PNETID_IBPORT => out of bounds array access to + smc_ib_device::pattr[-1]. + +Fix it by validating that all expected attributes are present and that +SMC_PNETID_IBPORT is nonzero. + +Reported-by: syzbot+5cd61039dc9b8bfa6e47@syzkaller.appspotmail.com +Fixes: 6812baabf24d ("smc: establish pnet table management") +Cc: # v4.11+ +Signed-off-by: Eric Biggers +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/smc_pnet.c | 71 +++++++++++++++++++++++++++++------------------------ + 1 file changed, 40 insertions(+), 31 deletions(-) + +--- a/net/smc/smc_pnet.c ++++ b/net/smc/smc_pnet.c +@@ -245,40 +245,45 @@ out: + static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, + struct nlattr *tb[]) + { +- char *string, *ibname = NULL; +- int rc = 0; ++ char *string, *ibname; ++ int rc; + + memset(pnetelem, 0, sizeof(*pnetelem)); + INIT_LIST_HEAD(&pnetelem->list); +- if (tb[SMC_PNETID_NAME]) { +- string = (char *)nla_data(tb[SMC_PNETID_NAME]); +- if (!smc_pnetid_valid(string, pnetelem->pnet_name)) { +- rc = -EINVAL; +- goto error; +- } +- } +- if (tb[SMC_PNETID_ETHNAME]) { +- string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); +- pnetelem->ndev = dev_get_by_name(net, string); +- if (!pnetelem->ndev) +- return -ENOENT; +- } +- if (tb[SMC_PNETID_IBNAME]) { +- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); +- ibname = strim(ibname); +- pnetelem->smcibdev = smc_pnet_find_ib(ibname); +- if (!pnetelem->smcibdev) { +- rc = -ENOENT; +- goto error; +- } +- } +- if (tb[SMC_PNETID_IBPORT]) { +- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); +- if (pnetelem->ib_port > SMC_MAX_PORTS) { +- rc = -EINVAL; +- goto error; +- } +- } ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_NAME]) ++ goto error; ++ string = (char *)nla_data(tb[SMC_PNETID_NAME]); ++ if (!smc_pnetid_valid(string, pnetelem->pnet_name)) ++ goto error; ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_ETHNAME]) ++ goto error; ++ rc = -ENOENT; ++ string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); ++ pnetelem->ndev = dev_get_by_name(net, string); ++ if (!pnetelem->ndev) ++ goto error; ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_IBNAME]) ++ goto error; ++ rc = -ENOENT; ++ ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); ++ ibname = strim(ibname); ++ pnetelem->smcibdev = smc_pnet_find_ib(ibname); ++ if (!pnetelem->smcibdev) ++ goto error; ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_IBPORT]) ++ goto error; ++ pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); ++ if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) ++ goto error; ++ + return 0; + + error: +@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff * + void *hdr; + int rc; + ++ if (!info->attrs[SMC_PNETID_NAME]) ++ return -EINVAL; + pnetelem = smc_pnet_find_pnetid( + (char *)nla_data(info->attrs[SMC_PNETID_NAME])); + if (!pnetelem) +@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff * + + static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) + { ++ if (!info->attrs[SMC_PNETID_NAME]) ++ return -EINVAL; + return smc_pnet_remove_by_pnetid( + (char *)nla_data(info->attrs[SMC_PNETID_NAME])); + } diff --git a/queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch b/queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch new file mode 100644 index 00000000000..3964231ac4a --- /dev/null +++ b/queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch @@ -0,0 +1,54 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Willem de Bruijn +Date: Thu, 17 May 2018 13:13:29 -0400 +Subject: net: test tailroom before appending to linear skb + +From: Willem de Bruijn + +[ Upstream commit 113f99c3358564a0647d444c2ae34e8b1abfd5b9 ] + +Device features may change during transmission. In particular with +corking, a device may toggle scatter-gather in between allocating +and writing to an skb. + +Do not unconditionally assume that !NETIF_F_SG at write time implies +that the same held at alloc time and thus the skb has sufficient +tailroom. + +This issue predates git history. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_output.c | 3 ++- + net/ipv6/ip6_output.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1040,7 +1040,8 @@ alloc_new_skb: + if (copy > length) + copy = length; + +- if (!(rt->dst.dev->features&NETIF_F_SG)) { ++ if (!(rt->dst.dev->features&NETIF_F_SG) && ++ skb_tailroom(skb) >= copy) { + unsigned int off; + + off = skb->len; +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1489,7 +1489,8 @@ alloc_new_skb: + if (copy > length) + copy = length; + +- if (!(rt->dst.dev->features&NETIF_F_SG)) { ++ if (!(rt->dst.dev->features&NETIF_F_SG) && ++ skb_tailroom(skb) >= copy) { + unsigned int off; + + off = skb->len; diff --git a/queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch b/queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch new file mode 100644 index 00000000000..cead00a733b --- /dev/null +++ b/queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch @@ -0,0 +1,56 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Willem de Bruijn +Date: Fri, 11 May 2018 13:24:25 -0400 +Subject: packet: in packet_snd start writing at link layer allocation + +From: Willem de Bruijn + +[ Upstream commit b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba ] + +Packet sockets allow construction of packets shorter than +dev->hard_header_len to accommodate protocols with variable length +link layer headers. These packets are padded to dev->hard_header_len, +because some device drivers interpret that as a minimum packet size. + +packet_snd reserves dev->hard_header_len bytes on allocation. +SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that +link layer headers are stored in the reserved range. SOCK_RAW sockets +do the same in tpacket_snd, but not in packet_snd. + +Syzbot was able to send a zero byte packet to a device with massive +116B link layer header, causing padding to cross over into skb_shinfo. +Fix this by writing from the start of the llheader reserved range also +in the case of packet_snd/SOCK_RAW. + +Update skb_set_network_header to the new offset. This also corrects +it for SOCK_DGRAM, where it incorrectly double counted reserve due to +the skb_push in dev_hard_header. + +Fixes: 9ed988cd5915 ("packet: validate variable length ll headers") +Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2903,13 +2903,15 @@ static int packet_snd(struct socket *soc + if (skb == NULL) + goto out_unlock; + +- skb_set_network_header(skb, reserve); ++ skb_reset_network_header(skb); + + err = -EINVAL; + if (sock->type == SOCK_DGRAM) { + offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); + if (unlikely(offset < 0)) + goto out_free; ++ } else if (reserve) { ++ skb_push(skb, reserve); + } + + /* Returns -EFAULT on error */ diff --git a/queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch b/queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch new file mode 100644 index 00000000000..b979d507505 --- /dev/null +++ b/queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch @@ -0,0 +1,90 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Michal Kalderon +Date: Wed, 16 May 2018 14:44:40 +0300 +Subject: qed: Fix LL2 race during connection terminate + +From: Michal Kalderon + +[ Upstream commit 490068deaef0c76e47bf89c457de899b7d3995c7 ] + +Stress on qedi/qedr load unload lead to list_del corruption. +This is due to ll2 connection terminate freeing resources without +verifying that no more ll2 processing will occur. + +This patch unregisters the ll2 status block before terminating +the connection to assure this race does not occur. + +Fixes: 1d6cff4fca4366 ("qed: Add iSCSI out of order packet handling") +Signed-off-by: Ariel Elior +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c +@@ -842,6 +842,9 @@ static int qed_ll2_lb_rxq_completion(str + struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; + int rc; + ++ if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) ++ return 0; ++ + rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn); + if (rc) + return rc; +@@ -862,6 +865,9 @@ static int qed_ll2_lb_txq_completion(str + u16 new_idx = 0, num_bds = 0; + int rc; + ++ if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) ++ return 0; ++ + new_idx = le16_to_cpu(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + +@@ -1915,17 +1921,25 @@ int qed_ll2_terminate_connection(void *c + + /* Stop Tx & Rx of connection, if needed */ + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { ++ p_ll2_conn->tx_queue.b_cb_registred = false; ++ smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */ + rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + goto out; ++ + qed_ll2_txq_flush(p_hwfn, connection_handle); ++ qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); + } + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { ++ p_ll2_conn->rx_queue.b_cb_registred = false; ++ smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */ + rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + goto out; ++ + qed_ll2_rxq_flush(p_hwfn, connection_handle); ++ qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); + } + + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) +@@ -1973,16 +1987,6 @@ void qed_ll2_release_connection(void *cx + if (!p_ll2_conn) + return; + +- if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { +- p_ll2_conn->rx_queue.b_cb_registred = false; +- qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); +- } +- +- if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { +- p_ll2_conn->tx_queue.b_cb_registred = false; +- qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); +- } +- + kfree(p_ll2_conn->tx_queue.descq_mem); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); + diff --git a/queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch b/queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch new file mode 100644 index 00000000000..0bff47d950c --- /dev/null +++ b/queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch @@ -0,0 +1,92 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Michal Kalderon +Date: Wed, 16 May 2018 14:44:39 +0300 +Subject: qed: Fix possibility of list corruption during rmmod flows + +From: Michal Kalderon + +[ Upstream commit ffd2c0d12752a69e480366031ec7a7d723dd2510 ] + +The ll2 flows of flushing the txq/rxq need to be synchronized with the +regular fp processing. Caused list corruption during load/unload stress +tests. + +Fixes: 0a7fb11c23c0f ("qed: Add Light L2 support") +Signed-off-by: Ariel Elior +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c +@@ -292,6 +292,7 @@ static void qed_ll2_txq_flush(struct qed + struct qed_ll2_tx_packet *p_pkt = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_tx_queue *p_tx; ++ unsigned long flags = 0; + dma_addr_t tx_frag; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); +@@ -300,6 +301,7 @@ static void qed_ll2_txq_flush(struct qed + + p_tx = &p_ll2_conn->tx_queue; + ++ spin_lock_irqsave(&p_tx->lock, flags); + while (!list_empty(&p_tx->active_descq)) { + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); +@@ -309,6 +311,7 @@ static void qed_ll2_txq_flush(struct qed + list_del(&p_pkt->list_entry); + b_last_packet = list_empty(&p_tx->active_descq); + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); ++ spin_unlock_irqrestore(&p_tx->lock, flags); + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) { + struct qed_ooo_buffer *p_buffer; + +@@ -328,7 +331,9 @@ static void qed_ll2_txq_flush(struct qed + b_last_frag, + b_last_packet); + } ++ spin_lock_irqsave(&p_tx->lock, flags); + } ++ spin_unlock_irqrestore(&p_tx->lock, flags); + } + + static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) +@@ -556,6 +561,7 @@ static void qed_ll2_rxq_flush(struct qed + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ll2_rx_packet *p_pkt = NULL; + struct qed_ll2_rx_queue *p_rx; ++ unsigned long flags = 0; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) +@@ -563,13 +569,14 @@ static void qed_ll2_rxq_flush(struct qed + + p_rx = &p_ll2_conn->rx_queue; + ++ spin_lock_irqsave(&p_rx->lock, flags); + while (!list_empty(&p_rx->active_descq)) { + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) + break; +- + list_move_tail(&p_pkt->list_entry, &p_rx->free_descq); ++ spin_unlock_irqrestore(&p_rx->lock, flags); + + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) { + struct qed_ooo_buffer *p_buffer; +@@ -588,7 +595,9 @@ static void qed_ll2_rxq_flush(struct qed + cookie, + rx_buf_addr, b_last); + } ++ spin_lock_irqsave(&p_rx->lock, flags); + } ++ spin_unlock_irqrestore(&p_rx->lock, flags); + } + + static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags) diff --git a/queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch b/queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch new file mode 100644 index 00000000000..b880e1a56d0 --- /dev/null +++ b/queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch @@ -0,0 +1,63 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Michal Kalderon +Date: Wed, 16 May 2018 14:44:38 +0300 +Subject: qed: LL2 flush isles when connection is closed + +From: Michal Kalderon + +[ Upstream commit f9bcd60274a565751abef622f9018badd01a17c8 ] + +Driver should free all pending isles once it gets a FLUSH cqe from FW. +Part of iSCSI out of order flow. + +Fixes: 1d6cff4fca4366 ("qed: Add iSCSI out of order packet handling") +Signed-off-by: Ariel Elior +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c +@@ -601,6 +601,27 @@ static u8 qed_ll2_convert_rx_parse_to_tx + return bd_flags; + } + ++static bool ++qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn, ++ struct core_rx_slow_path_cqe *p_cqe) ++{ ++ struct ooo_opaque *iscsi_ooo; ++ u32 cid; ++ ++ if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) ++ return false; ++ ++ iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data; ++ if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES) ++ return false; ++ ++ /* Need to make a flush */ ++ cid = le32_to_cpu(iscsi_ooo->cid); ++ qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid); ++ ++ return true; ++} ++ + static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) + { +@@ -627,6 +648,11 @@ static int qed_ll2_lb_rxq_handler(struct + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + cqe_type = cqe->rx_cqe_sp.type; + ++ if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH) ++ if (qed_ll2_lb_rxq_handler_slowpath(p_hwfn, ++ &cqe->rx_cqe_sp)) ++ continue; ++ + if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) { + DP_NOTICE(p_hwfn, + "Got a non-regular LB LL2 completion [type 0x%02x]\n", diff --git a/queue-4.16/series b/queue-4.16/series index 86dec53404f..8f8fd865f00 100644 --- a/queue-4.16/series +++ b/queue-4.16/series @@ -1 +1,33 @@ net-mlx5-fix-build-break-when-config_smp-n.patch +net-fix-a-bug-in-removing-queues-from-xps-map.patch +net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch +net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch +net-sched-red-avoid-hashing-null-child.patch +net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch +net-test-tailroom-before-appending-to-linear-skb.patch +packet-in-packet_snd-start-writing-at-link-layer-allocation.patch +sock_diag-fix-use-after-free-read-in-__sk_free.patch +tcp-purge-write-queue-in-tcp_connect_init.patch +tun-fix-use-after-free-for-ptr_ring.patch +tuntap-fix-use-after-free-during-release.patch +cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch +net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch +net-dsa-do-not-register-devlink-for-unused-ports.patch +net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch +net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch +3c59x-convert-to-generic-dma-api.patch +cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch +vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch +vmxnet3-use-dma-memory-barriers-where-required.patch +net-ip6_gre-request-headroom-in-__gre6_xmit.patch +net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch +net-ip6_gre-split-up-ip6gre_tnl_link_config.patch +net-ip6_gre-split-up-ip6gre_tnl_change.patch +net-ip6_gre-split-up-ip6gre_newlink.patch +net-ip6_gre-split-up-ip6gre_changelink.patch +net-ip6_gre-fix-ip6erspan-hlen-calculation.patch +net-ip6_gre-fix-tunnel-metadata-device-sharing.patch +qed-ll2-flush-isles-when-connection-is-closed.patch +qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch +qed-fix-ll2-race-during-connection-terminate.patch +sparc-vio-use-put_device-instead-of-kfree.patch diff --git a/queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch b/queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch new file mode 100644 index 00000000000..ded9d8dc935 --- /dev/null +++ b/queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch @@ -0,0 +1,128 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Eric Dumazet +Date: Fri, 18 May 2018 04:47:55 -0700 +Subject: sock_diag: fix use-after-free read in __sk_free + +From: Eric Dumazet + +[ Upstream commit 9709020c86f6bf8439ca3effc58cfca49a5de192 ] + +We must not call sock_diag_has_destroy_listeners(sk) on a socket +that has no reference on net structure. + +BUG: KASAN: use-after-free in sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline] +BUG: KASAN: use-after-free in __sk_free+0x329/0x340 net/core/sock.c:1609 +Read of size 8 at addr ffff88018a02e3a0 by task swapper/1/0 + +CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.17.0-rc5+ #54 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b9/0x294 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline] + __sk_free+0x329/0x340 net/core/sock.c:1609 + sk_free+0x42/0x50 net/core/sock.c:1623 + sock_put include/net/sock.h:1664 [inline] + reqsk_free include/net/request_sock.h:116 [inline] + reqsk_put include/net/request_sock.h:124 [inline] + inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:672 [inline] + reqsk_timer_handler+0xe27/0x10e0 net/ipv4/inet_connection_sock.c:739 + call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 + expire_timers kernel/time/timer.c:1363 [inline] + __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 + run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 + invoke_softirq kernel/softirq.c:365 [inline] + irq_exit+0x1d1/0x200 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:525 [inline] + smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 + +RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:54 +RSP: 0018:ffff8801d9ae7c38 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 +RAX: dffffc0000000000 RBX: 1ffff1003b35cf8a RCX: 0000000000000000 +RDX: 1ffffffff11a30d0 RSI: 0000000000000001 RDI: ffffffff88d18680 +RBP: ffff8801d9ae7c38 R08: ffffed003b5e46c3 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 +R13: ffff8801d9ae7cf0 R14: ffffffff897bef20 R15: 0000000000000000 + arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline] + default_idle+0xc2/0x440 arch/x86/kernel/process.c:354 + arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:345 + default_idle_call+0x6d/0x90 kernel/sched/idle.c:93 + cpuidle_idle_call kernel/sched/idle.c:153 [inline] + do_idle+0x395/0x560 kernel/sched/idle.c:262 + cpu_startup_entry+0x104/0x120 kernel/sched/idle.c:368 + start_secondary+0x426/0x5b0 arch/x86/kernel/smpboot.c:269 + secondary_startup_64+0xa5/0xb0 arch/x86/kernel/head_64.S:242 + +Allocated by task 4557: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 + kmem_cache_zalloc include/linux/slab.h:691 [inline] + net_alloc net/core/net_namespace.c:383 [inline] + copy_net_ns+0x159/0x4c0 net/core/net_namespace.c:423 + create_new_namespaces+0x69d/0x8f0 kernel/nsproxy.c:107 + unshare_nsproxy_namespaces+0xc3/0x1f0 kernel/nsproxy.c:206 + ksys_unshare+0x708/0xf90 kernel/fork.c:2408 + __do_sys_unshare kernel/fork.c:2476 [inline] + __se_sys_unshare kernel/fork.c:2474 [inline] + __x64_sys_unshare+0x31/0x40 kernel/fork.c:2474 + do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 69: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 + net_free net/core/net_namespace.c:399 [inline] + net_drop_ns.part.14+0x11a/0x130 net/core/net_namespace.c:406 + net_drop_ns net/core/net_namespace.c:405 [inline] + cleanup_net+0x6a1/0xb20 net/core/net_namespace.c:541 + process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145 + worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279 + kthread+0x345/0x410 kernel/kthread.c:240 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 + +The buggy address belongs to the object at ffff88018a02c140 + which belongs to the cache net_namespace of size 8832 +The buggy address is located 8800 bytes inside of + 8832-byte region [ffff88018a02c140, ffff88018a02e3c0) +The buggy address belongs to the page: +page:ffffea0006280b00 count:1 mapcount:0 mapping:ffff88018a02c140 index:0x0 compound_mapcount: 0 +flags: 0x2fffc0000008100(slab|head) +raw: 02fffc0000008100 ffff88018a02c140 0000000000000000 0000000100000001 +raw: ffffea00062a1320 ffffea0006268020 ffff8801d9bdde40 0000000000000000 +page dumped because: kasan: bad access detected + +Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets") +Signed-off-by: Eric Dumazet +Cc: Craig Gallek +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1603,7 +1603,7 @@ static void __sk_free(struct sock *sk) + if (likely(sk->sk_net_refcnt)) + sock_inuse_add(sock_net(sk), -1); + +- if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) ++ if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) + sock_diag_broadcast_destroy(sk); + else + sk_destruct(sk); diff --git a/queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch b/queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch new file mode 100644 index 00000000000..d8831d1e62e --- /dev/null +++ b/queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch @@ -0,0 +1,31 @@ +From 00ad691ab140b54ab9f5de5e74cb994f552e8124 Mon Sep 17 00:00:00 2001 +From: Arvind Yadav +Date: Wed, 25 Apr 2018 20:26:14 +0530 +Subject: sparc: vio: use put_device() instead of kfree() + +From: Arvind Yadav + +[ Upstream commit 00ad691ab140b54ab9f5de5e74cb994f552e8124 ] + +Never directly free @dev after calling device_register(), even +if it returned an error. Always use put_device() to give up the +reference initialized. + +Signed-off-by: Arvind Yadav +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/vio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/sparc/kernel/vio.c ++++ b/arch/sparc/kernel/vio.c +@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(st + if (err) { + printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", + dev_name(&vdev->dev), err); +- kfree(vdev); ++ put_device(&vdev->dev); + return NULL; + } + if (vdev->dp) diff --git a/queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch b/queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch new file mode 100644 index 00000000000..52cc1e10bfe --- /dev/null +++ b/queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch @@ -0,0 +1,88 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Eric Dumazet +Date: Mon, 14 May 2018 21:14:26 -0700 +Subject: tcp: purge write queue in tcp_connect_init() + +From: Eric Dumazet + +[ Upstream commit 7f582b248d0a86bae5788c548d7bb5bca6f7691a ] + +syzkaller found a reliable way to crash the host, hitting a BUG() +in __tcp_retransmit_skb() + +Malicous MSG_FASTOPEN is the root cause. We need to purge write queue +in tcp_connect_init() at the point we init snd_una/write_seq. + +This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE() + +kernel BUG at net/ipv4/tcp_output.c:2837! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837 +RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206 +RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49 +RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005 +RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2 +R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad +R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80 +FS: 0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900 +CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 +CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923 + tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488 + tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573 + tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593 + call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 + expire_timers kernel/time/timer.c:1363 [inline] + __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 + run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 + invoke_softirq kernel/softirq.c:365 [inline] + irq_exit+0x1d1/0x200 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:525 [inline] + smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 + +Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Cc: Neal Cardwell +Reported-by: syzbot +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2860,8 +2860,10 @@ int __tcp_retransmit_skb(struct sock *sk + return -EBUSY; + + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { +- if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) +- BUG(); ++ if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { ++ WARN_ON_ONCE(1); ++ return -EINVAL; ++ } + if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) + return -ENOMEM; + } +@@ -3369,6 +3371,7 @@ static void tcp_connect_init(struct sock + sock_reset_flag(sk, SOCK_DONE); + tp->snd_wnd = 0; + tcp_init_wl(tp, 0); ++ tcp_write_queue_purge(sk); + tp->snd_una = tp->write_seq; + tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; diff --git a/queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch b/queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch new file mode 100644 index 00000000000..dafcbea1b01 --- /dev/null +++ b/queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch @@ -0,0 +1,116 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Jason Wang +Date: Fri, 11 May 2018 10:49:25 +0800 +Subject: tun: fix use after free for ptr_ring + +From: Jason Wang + +[ Upstream commit b196d88aba8ac72b775137854121097f4c4c6862 ] + +We used to initialize ptr_ring during TUNSETIFF, this is because its +size depends on the tx_queue_len of netdevice. And we try to clean it +up when socket were detached from netdevice. A race were spotted when +trying to do uninit during a read which will lead a use after free for +pointer ring. Solving this by always initialize a zero size ptr_ring +in open() and do resizing during TUNSETIFF, and then we can safely do +cleanup during close(). With this, there's no need for the workaround +that was introduced by commit 4df0bfc79904 ("tun: fix a memory leak +for tfile->tx_array"). + +Reported-by: syzbot+e8b902c3c3fadf0a9dba@syzkaller.appspotmail.com +Cc: Eric Dumazet +Cc: Cong Wang +Cc: Michael S. Tsirkin +Fixes: 1576d9860599 ("tun: switch to use skb array for tx") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 27 ++++++++++++--------------- + 1 file changed, 12 insertions(+), 15 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -680,15 +680,6 @@ static void tun_queue_purge(struct tun_f + skb_queue_purge(&tfile->sk.sk_error_queue); + } + +-static void tun_cleanup_tx_ring(struct tun_file *tfile) +-{ +- if (tfile->tx_ring.queue) { +- ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); +- xdp_rxq_info_unreg(&tfile->xdp_rxq); +- memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); +- } +-} +- + static void __tun_detach(struct tun_file *tfile, bool clean) + { + struct tun_file *ntfile; +@@ -735,7 +726,8 @@ static void __tun_detach(struct tun_file + tun->dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(tun->dev); + } +- tun_cleanup_tx_ring(tfile); ++ if (tun) ++ xdp_rxq_info_unreg(&tfile->xdp_rxq); + sock_put(&tfile->sk); + } + } +@@ -775,14 +767,14 @@ static void tun_detach_all(struct net_de + tun_napi_del(tun, tfile); + /* Drop read queue */ + tun_queue_purge(tfile); ++ xdp_rxq_info_unreg(&tfile->xdp_rxq); + sock_put(&tfile->sk); +- tun_cleanup_tx_ring(tfile); + } + list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_enable_queue(tfile); + tun_queue_purge(tfile); ++ xdp_rxq_info_unreg(&tfile->xdp_rxq); + sock_put(&tfile->sk); +- tun_cleanup_tx_ring(tfile); + } + BUG_ON(tun->numdisabled != 0); + +@@ -826,7 +818,8 @@ static int tun_attach(struct tun_struct + } + + if (!tfile->detached && +- ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) { ++ ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len, ++ GFP_KERNEL, tun_ptr_free)) { + err = -ENOMEM; + goto out; + } +@@ -3131,6 +3124,11 @@ static int tun_chr_open(struct inode *in + &tun_proto, 0); + if (!tfile) + return -ENOMEM; ++ if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { ++ sk_free(&tfile->sk); ++ return -ENOMEM; ++ } ++ + RCU_INIT_POINTER(tfile->tun, NULL); + tfile->flags = 0; + tfile->ifindex = 0; +@@ -3151,8 +3149,6 @@ static int tun_chr_open(struct inode *in + + sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + +- memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); +- + return 0; + } + +@@ -3161,6 +3157,7 @@ static int tun_chr_close(struct inode *i + struct tun_file *tfile = file->private_data; + + tun_detach(tfile, true); ++ ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); + + return 0; + } diff --git a/queue-4.16/tuntap-fix-use-after-free-during-release.patch b/queue-4.16/tuntap-fix-use-after-free-during-release.patch new file mode 100644 index 00000000000..7cffdcc5df3 --- /dev/null +++ b/queue-4.16/tuntap-fix-use-after-free-during-release.patch @@ -0,0 +1,43 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: Jason Wang +Date: Wed, 16 May 2018 20:39:33 +0800 +Subject: tuntap: fix use after free during release + +From: Jason Wang + +[ Upstream commit 7063efd33bb15abc0160347f89eb5aba6b7d000e ] + +After commit b196d88aba8a ("tun: fix use after free for ptr_ring") we +need clean up tx ring during release(). But unfortunately, it tries to +do the cleanup blindly after socket were destroyed which will lead +another use-after-free. Fix this by doing the cleanup before dropping +the last reference of the socket in __tun_detach(). + +Reported-by: Andrei Vagin +Acked-by: Andrei Vagin +Fixes: b196d88aba8a ("tun: fix use after free for ptr_ring") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -728,6 +728,7 @@ static void __tun_detach(struct tun_file + } + if (tun) + xdp_rxq_info_unreg(&tfile->xdp_rxq); ++ ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); + sock_put(&tfile->sk); + } + } +@@ -3157,7 +3158,6 @@ static int tun_chr_close(struct inode *i + struct tun_file *tfile = file->private_data; + + tun_detach(tfile, true); +- ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); + + return 0; + } diff --git a/queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch b/queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch new file mode 100644 index 00000000000..bd6ab6b324b --- /dev/null +++ b/queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch @@ -0,0 +1,153 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: "hpreg@vmware.com" +Date: Mon, 14 May 2018 08:14:34 -0400 +Subject: vmxnet3: set the DMA mask before the first DMA map operation + +From: "hpreg@vmware.com" + +[ Upstream commit 61aeecea40afb2b89933e27cd4adb10fc2e75cfd ] + +The DMA mask must be set before, not after, the first DMA map operation, or +the first DMA map operation could in theory fail on some systems. + +Fixes: b0eb57cb97e78 ("VMXNET3: Add support for virtual IOMMU") +Signed-off-by: Regis Duchesne +Acked-by: Ronak Doshi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 50 +++++++++++++++++++------------------- + drivers/net/vmxnet3/vmxnet3_int.h | 8 +++--- + 2 files changed, 30 insertions(+), 28 deletions(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -2688,7 +2688,7 @@ vmxnet3_set_mac_addr(struct net_device * + /* ==================== initialization and cleanup routines ============ */ + + static int +-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) ++vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter) + { + int err; + unsigned long mmio_start, mmio_len; +@@ -2700,30 +2700,12 @@ vmxnet3_alloc_pci_resources(struct vmxne + return err; + } + +- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { +- if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { +- dev_err(&pdev->dev, +- "pci_set_consistent_dma_mask failed\n"); +- err = -EIO; +- goto err_set_mask; +- } +- *dma64 = true; +- } else { +- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { +- dev_err(&pdev->dev, +- "pci_set_dma_mask failed\n"); +- err = -EIO; +- goto err_set_mask; +- } +- *dma64 = false; +- } +- + err = pci_request_selected_regions(pdev, (1 << 2) - 1, + vmxnet3_driver_name); + if (err) { + dev_err(&pdev->dev, + "Failed to request region for adapter: error %d\n", err); +- goto err_set_mask; ++ goto err_enable_device; + } + + pci_set_master(pdev); +@@ -2751,7 +2733,7 @@ err_bar1: + iounmap(adapter->hw_addr0); + err_ioremap: + pci_release_selected_regions(pdev, (1 << 2) - 1); +-err_set_mask: ++err_enable_device: + pci_disable_device(pdev); + return err; + } +@@ -3254,7 +3236,7 @@ vmxnet3_probe_device(struct pci_dev *pde + #endif + }; + int err; +- bool dma64 = false; /* stupid gcc */ ++ bool dma64; + u32 ver; + struct net_device *netdev; + struct vmxnet3_adapter *adapter; +@@ -3300,6 +3282,24 @@ vmxnet3_probe_device(struct pci_dev *pde + adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE; + adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE; + ++ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { ++ if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { ++ dev_err(&pdev->dev, ++ "pci_set_consistent_dma_mask failed\n"); ++ err = -EIO; ++ goto err_set_mask; ++ } ++ dma64 = true; ++ } else { ++ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { ++ dev_err(&pdev->dev, ++ "pci_set_dma_mask failed\n"); ++ err = -EIO; ++ goto err_set_mask; ++ } ++ dma64 = false; ++ } ++ + spin_lock_init(&adapter->cmd_lock); + adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter, + sizeof(struct vmxnet3_adapter), +@@ -3307,7 +3307,7 @@ vmxnet3_probe_device(struct pci_dev *pde + if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) { + dev_err(&pdev->dev, "Failed to map dma\n"); + err = -EFAULT; +- goto err_dma_map; ++ goto err_set_mask; + } + adapter->shared = dma_alloc_coherent( + &adapter->pdev->dev, +@@ -3358,7 +3358,7 @@ vmxnet3_probe_device(struct pci_dev *pde + } + #endif /* VMXNET3_RSS */ + +- err = vmxnet3_alloc_pci_resources(adapter, &dma64); ++ err = vmxnet3_alloc_pci_resources(adapter); + if (err < 0) + goto err_alloc_pci; + +@@ -3504,7 +3504,7 @@ err_alloc_queue_desc: + err_alloc_shared: + dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, + sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE); +-err_dma_map: ++err_set_mask: + free_netdev(netdev); + return err; + } +--- a/drivers/net/vmxnet3/vmxnet3_int.h ++++ b/drivers/net/vmxnet3/vmxnet3_int.h +@@ -69,10 +69,12 @@ + /* + * Version numbers + */ +-#define VMXNET3_DRIVER_VERSION_STRING "1.4.14.0-k" ++#define VMXNET3_DRIVER_VERSION_STRING "1.4.15.0-k" + +-/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ +-#define VMXNET3_DRIVER_VERSION_NUM 0x01040e00 ++/* Each byte of this 32-bit integer encodes a version number in ++ * VMXNET3_DRIVER_VERSION_STRING. ++ */ ++#define VMXNET3_DRIVER_VERSION_NUM 0x01040f00 + + #if defined(CONFIG_PCI_MSI) + /* RSS only makes sense if MSI-X is supported. */ diff --git a/queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch b/queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch new file mode 100644 index 00000000000..b039e40e68e --- /dev/null +++ b/queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch @@ -0,0 +1,91 @@ +From foo@baz Tue May 22 20:10:23 CEST 2018 +From: "hpreg@vmware.com" +Date: Mon, 14 May 2018 08:14:49 -0400 +Subject: vmxnet3: use DMA memory barriers where required + +From: "hpreg@vmware.com" + +[ Upstream commit f3002c1374fb2367c9d8dbb28852791ef90d2bac ] + +The gen bits must be read first from (resp. written last to) DMA memory. +The proper way to enforce this on Linux is to call dma_rmb() (resp. +dma_wmb()). + +Signed-off-by: Regis Duchesne +Acked-by: Ronak Doshi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 22 ++++++++++++++++++++++ + drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- + 2 files changed, 24 insertions(+), 2 deletions(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx + + gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; + while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { ++ /* Prevent any &gdesc->tcd field from being (speculatively) ++ * read before (&gdesc->tcd)->gen is read. ++ */ ++ dma_rmb(); ++ + completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( + &gdesc->tcd), tq, adapter->pdev, + adapter); +@@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, str + gdesc->txd.tci = skb_vlan_tag_get(skb); + } + ++ /* Ensure that the write to (&gdesc->txd)->gen will be observed after ++ * all other writes to &gdesc->txd. ++ */ ++ dma_wmb(); ++ + /* finally flips the GEN bit of the SOP desc. */ + gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ + VMXNET3_TXD_GEN); +@@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx + */ + break; + } ++ ++ /* Prevent any rcd field from being (speculatively) read before ++ * rcd->gen is read. ++ */ ++ dma_rmb(); ++ + BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 && + rcd->rqID != rq->dataRingQid); + idx = rcd->rxdIdx; +@@ -1528,6 +1544,12 @@ rcd_done: + ring->next2comp = idx; + num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); + ring = rq->rx_ring + ring_idx; ++ ++ /* Ensure that the writes to rxd->gen bits will be observed ++ * after all other writes to rxd objects. ++ */ ++ dma_wmb(); ++ + while (num_to_alloc) { + vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, + &rxCmdDesc); +--- a/drivers/net/vmxnet3/vmxnet3_int.h ++++ b/drivers/net/vmxnet3/vmxnet3_int.h +@@ -69,12 +69,12 @@ + /* + * Version numbers + */ +-#define VMXNET3_DRIVER_VERSION_STRING "1.4.15.0-k" ++#define VMXNET3_DRIVER_VERSION_STRING "1.4.16.0-k" + + /* Each byte of this 32-bit integer encodes a version number in + * VMXNET3_DRIVER_VERSION_STRING. + */ +-#define VMXNET3_DRIVER_VERSION_NUM 0x01040f00 ++#define VMXNET3_DRIVER_VERSION_NUM 0x01041000 + + #if defined(CONFIG_PCI_MSI) + /* RSS only makes sense if MSI-X is supported. */ -- 2.47.2