4.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 22 May 2018 18:11:42 +0000 (20:11 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 22 May 2018 18:11:42 +0000 (20:11 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 22 May 2018 18:11:42 +0000 (20:11 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 22 May 2018 18:11:42 +0000 (20:11 +0200)
diff --git a/queue-4.16/3c59x-convert-to-generic-dma-api.patch b/queue-4.16/3c59x-convert-to-generic-dma-api.patch

new file mode 100644 (file)

index 0000000..72b1607
--- /dev/null
+++ b/queue-4.16/3c59x-convert-to-generic-dma-api.patch
@@ -0,0 +1,286 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Christoph Hellwig <hch@lst.de>
+Date: Sat, 12 May 2018 12:16:50 +0200
+Subject: 3c59x: convert to generic DMA API
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 55c82617c3e82210b7471e9334e8fc5df6a9961f ]
+
+This driver supports EISA devices in addition to PCI devices, and relied
+on the legacy behavior of the pci_dma* shims to pass on a NULL pointer
+to the DMA API, and the DMA API being able to handle that.  When the
+NULL forwarding broke the EISA support got broken.  Fix this by converting
+to the DMA API instead of the legacy PCI shims.
+
+Fixes: 4167b2ad ("PCI: Remove NULL device handling from PCI DMA API")
+Reported-by: tedheadster <tedheadster@gmail.com>
+Tested-by: tedheadster <tedheadster@gmail.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/3com/3c59x.c |  104 ++++++++++++++++++--------------------
+ 1 file changed, 51 insertions(+), 53 deletions(-)
+
+--- a/drivers/net/ethernet/3com/3c59x.c
++++ b/drivers/net/ethernet/3com/3c59x.c
+@@ -1212,9 +1212,9 @@ static int vortex_probe1(struct device *
+       vp->mii.reg_num_mask = 0x1f;
+ 
+       /* Makes sure rings are at least 16 byte aligned. */
+-      vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
++      vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+                                          + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+-                                         &vp->rx_ring_dma);
++                                         &vp->rx_ring_dma, GFP_KERNEL);
+       retval = -ENOMEM;
+       if (!vp->rx_ring)
+               goto free_device;
+@@ -1476,11 +1476,10 @@ static int vortex_probe1(struct device *
+               return 0;
+ 
+ free_ring:
+-      pci_free_consistent(pdev,
+-                                              sizeof(struct boom_rx_desc) * RX_RING_SIZE
+-                                                      + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+-                                              vp->rx_ring,
+-                                              vp->rx_ring_dma);
++      dma_free_coherent(&pdev->dev,
++              sizeof(struct boom_rx_desc) * RX_RING_SIZE +
++              sizeof(struct boom_tx_desc) * TX_RING_SIZE,
++              vp->rx_ring, vp->rx_ring_dma);
+ free_device:
+       free_netdev(dev);
+       pr_err(PFX "vortex_probe1 fails.  Returns %d\n", retval);
+@@ -1751,9 +1750,9 @@ vortex_open(struct net_device *dev)
+                               break;                  /* Bad news!  */
+ 
+                       skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
+-                      dma = pci_map_single(VORTEX_PCI(vp), skb->data,
+-                                           PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+-                      if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
++                      dma = dma_map_single(vp->gendev, skb->data,
++                                           PKT_BUF_SZ, DMA_FROM_DEVICE);
++                      if (dma_mapping_error(vp->gendev, dma))
+                               break;
+                       vp->rx_ring[i].addr = cpu_to_le32(dma);
+               }
+@@ -2067,9 +2066,9 @@ vortex_start_xmit(struct sk_buff *skb, s
+       if (vp->bus_master) {
+               /* Set the bus-master controller to transfer the packet. */
+               int len = (skb->len + 3) & ~3;
+-              vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
+-                                              PCI_DMA_TODEVICE);
+-              if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
++              vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len,
++                                              DMA_TO_DEVICE);
++              if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) {
+                       dev_kfree_skb_any(skb);
+                       dev->stats.tx_dropped++;
+                       return NETDEV_TX_OK;
+@@ -2168,9 +2167,9 @@ boomerang_start_xmit(struct sk_buff *skb
+                       vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
+ 
+       if (!skb_shinfo(skb)->nr_frags) {
+-              dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len,
+-                                        PCI_DMA_TODEVICE);
+-              if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
++              dma_addr = dma_map_single(vp->gendev, skb->data, skb->len,
++                                        DMA_TO_DEVICE);
++              if (dma_mapping_error(vp->gendev, dma_addr))
+                       goto out_dma_err;
+ 
+               vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
+@@ -2178,9 +2177,9 @@ boomerang_start_xmit(struct sk_buff *skb
+       } else {
+               int i;
+ 
+-              dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data,
+-                                        skb_headlen(skb), PCI_DMA_TODEVICE);
+-              if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
++              dma_addr = dma_map_single(vp->gendev, skb->data,
++                                        skb_headlen(skb), DMA_TO_DEVICE);
++              if (dma_mapping_error(vp->gendev, dma_addr))
+                       goto out_dma_err;
+ 
+               vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
+@@ -2189,21 +2188,21 @@ boomerang_start_xmit(struct sk_buff *skb
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ 
+-                      dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag,
++                      dma_addr = skb_frag_dma_map(vp->gendev, frag,
+                                                   0,
+                                                   frag->size,
+                                                   DMA_TO_DEVICE);
+-                      if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) {
++                      if (dma_mapping_error(vp->gendev, dma_addr)) {
+                               for(i = i-1; i >= 0; i--)
+-                                      dma_unmap_page(&VORTEX_PCI(vp)->dev,
++                                      dma_unmap_page(vp->gendev,
+                                                      le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr),
+                                                      le32_to_cpu(vp->tx_ring[entry].frag[i+1].length),
+                                                      DMA_TO_DEVICE);
+ 
+-                              pci_unmap_single(VORTEX_PCI(vp),
++                              dma_unmap_single(vp->gendev,
+                                                le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
+                                                le32_to_cpu(vp->tx_ring[entry].frag[0].length),
+-                                               PCI_DMA_TODEVICE);
++                                               DMA_TO_DEVICE);
+ 
+                               goto out_dma_err;
+                       }
+@@ -2218,8 +2217,8 @@ boomerang_start_xmit(struct sk_buff *skb
+               }
+       }
+ #else
+-      dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE);
+-      if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
++      dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE);
++      if (dma_mapping_error(vp->gendev, dma_addr))
+               goto out_dma_err;
+       vp->tx_ring[entry].addr = cpu_to_le32(dma_addr);
+       vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
+@@ -2254,7 +2253,7 @@ boomerang_start_xmit(struct sk_buff *skb
+ out:
+       return NETDEV_TX_OK;
+ out_dma_err:
+-      dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n");
++      dev_err(vp->gendev, "Error mapping dma buffer\n");
+       goto out;
+ }
+ 
+@@ -2322,7 +2321,7 @@ vortex_interrupt(int irq, void *dev_id)
+               if (status & DMADone) {
+                       if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) {
+                               iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
+-                              pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
++                              dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE);
+                               pkts_compl++;
+                               bytes_compl += vp->tx_skb->len;
+                               dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
+@@ -2459,19 +2458,19 @@ boomerang_interrupt(int irq, void *dev_i
+                                       struct sk_buff *skb = vp->tx_skbuff[entry];
+ #if DO_ZEROCOPY
+                                       int i;
+-                                      pci_unmap_single(VORTEX_PCI(vp),
++                                      dma_unmap_single(vp->gendev,
+                                                       le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
+                                                       le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
+-                                                      PCI_DMA_TODEVICE);
++                                                      DMA_TO_DEVICE);
+ 
+                                       for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)
+-                                                      pci_unmap_page(VORTEX_PCI(vp),
++                                                      dma_unmap_page(vp->gendev,
+                                                                                        le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
+                                                                                        le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
+-                                                                                       PCI_DMA_TODEVICE);
++                                                                                       DMA_TO_DEVICE);
+ #else
+-                                      pci_unmap_single(VORTEX_PCI(vp),
+-                                              le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
++                                      dma_unmap_single(vp->gendev,
++                                              le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE);
+ #endif
+                                       pkts_compl++;
+                                       bytes_compl += skb->len;
+@@ -2561,14 +2560,14 @@ static int vortex_rx(struct net_device *
+                               /* 'skb_put()' points to the start of sk_buff data area. */
+                               if (vp->bus_master &&
+                                       ! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) {
+-                                      dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len),
+-                                                                         pkt_len, PCI_DMA_FROMDEVICE);
++                                      dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len),
++                                                                         pkt_len, DMA_FROM_DEVICE);
+                                       iowrite32(dma, ioaddr + Wn7_MasterAddr);
+                                       iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
+                                       iowrite16(StartDMAUp, ioaddr + EL3_CMD);
+                                       while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)
+                                               ;
+-                                      pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE);
++                                      dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE);
+                               } else {
+                                       ioread32_rep(ioaddr + RX_FIFO,
+                                                    skb_put(skb, pkt_len),
+@@ -2635,11 +2634,11 @@ boomerang_rx(struct net_device *dev)
+                       if (pkt_len < rx_copybreak &&
+                           (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
+                               skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+-                              pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++                              dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
+                               /* 'skb_put()' points to the start of sk_buff data area. */
+                               skb_put_data(skb, vp->rx_skbuff[entry]->data,
+                                            pkt_len);
+-                              pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++                              dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
+                               vp->rx_copy++;
+                       } else {
+                               /* Pre-allocate the replacement skb.  If it or its
+@@ -2651,9 +2650,9 @@ boomerang_rx(struct net_device *dev)
+                                       dev->stats.rx_dropped++;
+                                       goto clear_complete;
+                               }
+-                              newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
+-                                                      PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+-                              if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
++                              newdma = dma_map_single(vp->gendev, newskb->data,
++                                                      PKT_BUF_SZ, DMA_FROM_DEVICE);
++                              if (dma_mapping_error(vp->gendev, newdma)) {
+                                       dev->stats.rx_dropped++;
+                                       consume_skb(newskb);
+                                       goto clear_complete;
+@@ -2664,7 +2663,7 @@ boomerang_rx(struct net_device *dev)
+                               vp->rx_skbuff[entry] = newskb;
+                               vp->rx_ring[entry].addr = cpu_to_le32(newdma);
+                               skb_put(skb, pkt_len);
+-                              pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++                              dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
+                               vp->rx_nocopy++;
+                       }
+                       skb->protocol = eth_type_trans(skb, dev);
+@@ -2761,8 +2760,8 @@ vortex_close(struct net_device *dev)
+       if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
+               for (i = 0; i < RX_RING_SIZE; i++)
+                       if (vp->rx_skbuff[i]) {
+-                              pci_unmap_single(       VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr),
+-                                                                      PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++                              dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr),
++                                                                      PKT_BUF_SZ, DMA_FROM_DEVICE);
+                               dev_kfree_skb(vp->rx_skbuff[i]);
+                               vp->rx_skbuff[i] = NULL;
+                       }
+@@ -2775,12 +2774,12 @@ vortex_close(struct net_device *dev)
+                               int k;
+ 
+                               for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
+-                                              pci_unmap_single(VORTEX_PCI(vp),
++                                              dma_unmap_single(vp->gendev,
+                                                                                le32_to_cpu(vp->tx_ring[i].frag[k].addr),
+                                                                                le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
+-                                                                               PCI_DMA_TODEVICE);
++                                                                               DMA_TO_DEVICE);
+ #else
+-                              pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
++                              dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE);
+ #endif
+                               dev_kfree_skb(skb);
+                               vp->tx_skbuff[i] = NULL;
+@@ -3288,11 +3287,10 @@ static void vortex_remove_one(struct pci
+ 
+       pci_iounmap(pdev, vp->ioaddr);
+ 
+-      pci_free_consistent(pdev,
+-                                              sizeof(struct boom_rx_desc) * RX_RING_SIZE
+-                                                      + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+-                                              vp->rx_ring,
+-                                              vp->rx_ring_dma);
++      dma_free_coherent(&pdev->dev,
++                      sizeof(struct boom_rx_desc) * RX_RING_SIZE +
++                      sizeof(struct boom_tx_desc) * TX_RING_SIZE,
++                      vp->rx_ring, vp->rx_ring_dma);
+ 
+       pci_release_regions(pdev);
+ 
diff --git a/queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch b/queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch

new file mode 100644 (file)

index 0000000..90a4033
--- /dev/null
+++ b/queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch
@@ -0,0 +1,137 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Kumar Sanghvi <kumaras@chelsio.com>
+Date: Mon, 14 May 2018 16:27:34 +0530
+Subject: cxgb4: Correct ntuple mask validation for hash filters
+
+From: Kumar Sanghvi <kumaras@chelsio.com>
+
+[ Upstream commit 849a742c59a3d597473c0232f9c2506c69eeef14 ]
+
+Earlier code of doing bitwise AND with field width bits was wrong.
+Instead, simplify code to calculate ntuple_mask based on supplied
+fields and then compare with mask configured in hw - which is the
+correct and simpler way to validate ntuple mask.
+
+Fixes: 3eb8b62d5a26 ("cxgb4: add support to create hash-filters via tc-flower offload")
+Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
+Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c |   88 +++++++---------------
+ 1 file changed, 30 insertions(+), 58 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+@@ -836,7 +836,7 @@ bool is_filter_exact_match(struct adapte
+ {
+       struct tp_params *tp = &adap->params.tp;
+       u64 hash_filter_mask = tp->hash_filter_mask;
+-      u32 mask;
++      u64 ntuple_mask = 0;
+ 
+       if (!is_hashfilter(adap))
+               return false;
+@@ -865,73 +865,45 @@ bool is_filter_exact_match(struct adapte
+       if (!fs->val.fport || fs->mask.fport != 0xffff)
+               return false;
+ 
+-      if (tp->fcoe_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W;
+-              if (mask && !fs->mask.fcoe)
+-                      return false;
+-      }
++      /* calculate tuple mask and compare with mask configured in hw */
++      if (tp->fcoe_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift;
+ 
+-      if (tp->port_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W;
+-              if (mask && !fs->mask.iport)
+-                      return false;
+-      }
++      if (tp->port_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.iport << tp->port_shift;
+ 
+       if (tp->vnic_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W;
+-
+-              if ((adap->params.tp.ingress_config & VNIC_F)) {
+-                      if (mask && !fs->mask.pfvf_vld)
+-                              return false;
+-              } else {
+-                      if (mask && !fs->mask.ovlan_vld)
+-                              return false;
+-              }
++              if ((adap->params.tp.ingress_config & VNIC_F))
++                      ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift;
++              else
++                      ntuple_mask |= (u64)fs->mask.ovlan_vld <<
++                              tp->vnic_shift;
+       }
+ 
+-      if (tp->vlan_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W;
+-              if (mask && !fs->mask.ivlan)
+-                      return false;
+-      }
++      if (tp->vlan_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift;
+ 
+-      if (tp->tos_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W;
+-              if (mask && !fs->mask.tos)
+-                      return false;
+-      }
++      if (tp->tos_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift;
+ 
+-      if (tp->protocol_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W;
+-              if (mask && !fs->mask.proto)
+-                      return false;
+-      }
++      if (tp->protocol_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift;
+ 
+-      if (tp->ethertype_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->ethertype_shift) &
+-                      FT_ETHERTYPE_W;
+-              if (mask && !fs->mask.ethtype)
+-                      return false;
+-      }
++      if (tp->ethertype_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift;
+ 
+-      if (tp->macmatch_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W;
+-              if (mask && !fs->mask.macidx)
+-                      return false;
+-      }
++      if (tp->macmatch_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift;
++
++      if (tp->matchtype_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift;
++
++      if (tp->frag_shift >= 0)
++              ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift;
++
++      if (ntuple_mask != hash_filter_mask)
++              return false;
+ 
+-      if (tp->matchtype_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->matchtype_shift) &
+-                      FT_MPSHITTYPE_W;
+-              if (mask && !fs->mask.matchtype)
+-                      return false;
+-      }
+-      if (tp->frag_shift >= 0) {
+-              mask = (hash_filter_mask >> tp->frag_shift) &
+-                      FT_FRAGMENTATION_W;
+-              if (mask && !fs->mask.frag)
+-                      return false;
+-      }
+       return true;
+ }
+ 
diff --git a/queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch b/queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch

new file mode 100644 (file)

index 0000000..8d74483
--- /dev/null
+++ b/queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch
@@ -0,0 +1,68 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Date: Fri, 18 May 2018 19:13:37 +0530
+Subject: cxgb4: fix offset in collecting TX rate limit info
+
+From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+
+[ Upstream commit d775f26b295a0a303f7a73d7da46e04296484fe7 ]
+
+Correct the indirect register offsets in collecting TX rate limit info
+in UP CIM logs.
+
+Also, T5 doesn't support these indirect register offsets, so remove
+them from collection logic.
+
+Fixes: be6e36d916b1 ("cxgb4: collect TX rate limit info in UP CIM logs")
+Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
+Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h |   28 +++++++---------------
+ 1 file changed, 9 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
++++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+@@ -419,15 +419,15 @@ static const u32 t6_up_cim_reg_array[][I
+       {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
+       {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
+       {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
+-      {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
+-      {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
+-      {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
+-      {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
+-      {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
+-      {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */
+-      {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */
+-      {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
+-      {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
++      {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */
++      {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */
++      {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */
++      {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */
++      {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */
++      {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */
++      {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */
++      {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */
++      {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */
+ };
+ 
+ static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
+@@ -444,16 +444,6 @@ static const u32 t5_up_cim_reg_array[][I
+       {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
+       {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
+       {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
+-      {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
+-      {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
+-      {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
+-      {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
+-      {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
+-      {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */
+-      {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */
+-      {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */
+-      {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
+-      {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
+ };
+ 
+ static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
diff --git a/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch

new file mode 100644 (file)

index 0000000..b10c7d2
--- /dev/null
+++ b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch
@@ -0,0 +1,52 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 15 May 2018 16:01:25 -0700
+Subject: net: dsa: bcm_sf2: Fix IPv6 rule half deletion
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 1942adf64214df370350aa46954ba27654456f68 ]
+
+It was possible to delete only one half of an IPv6, which would leave
+the second half still programmed and possibly in use. Instead of
+checking for the unused bitmap, we need to check the unique bitmap, and
+refuse any deletion that does not match that criteria. We also need to
+move that check from bcm_sf2_cfp_rule_del_one() into its caller:
+bcm_sf2_cfp_rule_del() otherwise we would not be able to delete second
+halves anymore that would not pass the first test.
+
+Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2_cfp.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -790,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(stru
+       int ret;
+       u32 reg;
+ 
+-      /* Refuse deletion of unused rules, and the default reserved rule */
+-      if (!test_bit(loc, priv->cfp.used) || loc == 0)
+-              return -EINVAL;
+-
+       /* Indicate which rule we want to read */
+       bcm_sf2_cfp_rule_addr_set(priv, loc);
+ 
+@@ -831,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct b
+       u32 next_loc = 0;
+       int ret;
+ 
++      /* Refuse deleting unused rules, and those that are not unique since
++       * that could leave IPv6 rules with one of the chained rule in the
++       * table.
++       */
++      if (!test_bit(loc, priv->cfp.unique) || loc == 0)
++              return -EINVAL;
++
+       ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc);
+       if (ret)
+               return ret;
diff --git a/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch

new file mode 100644 (file)

index 0000000..a935dc3
--- /dev/null
+++ b/queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch
@@ -0,0 +1,83 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 15 May 2018 16:01:24 -0700
+Subject: net: dsa: bcm_sf2: Fix IPv6 rules and chain ID
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 6c05561c541843b2bec2189f680bed6d20afc25b ]
+
+We had several issues that would make the programming of IPv6 rules both
+inconsistent and error prone:
+
+- the chain ID that we would be asking the hardware to put in the
+  packet's Broadcom tag would be off by one, it would return one of the
+  two indexes, but not the one user-space specified
+
+- when an user specified a particular location to insert a CFP rule at,
+  we would not be returning the same index, which would be confusing if
+  nothing else
+
+- finally, like IPv4, it would be possible to overflow the last entry by
+  re-programming it
+
+Fix this by swapping the usage of rule_index[0] and rule_index[1] where
+relevant in order to return a consistent and correct user-space
+experience.
+
+Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2_cfp.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -565,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(str
+        * first half because the HW search is by incrementing addresses.
+        */
+       if (fs->location == RX_CLS_LOC_ANY)
+-              rule_index[0] = find_first_zero_bit(priv->cfp.used,
+-                                                  bcm_sf2_cfp_rule_size(priv));
++              rule_index[1] = find_first_zero_bit(priv->cfp.used,
++                                                  priv->num_cfp_rules);
+       else
+-              rule_index[0] = fs->location;
++              rule_index[1] = fs->location;
++      if (rule_index[1] > bcm_sf2_cfp_rule_size(priv))
++              return -ENOSPC;
+ 
+       /* Flag it as used (cleared on error path) such that we can immediately
+        * obtain a second one to chain from.
+        */
+-      set_bit(rule_index[0], priv->cfp.used);
++      set_bit(rule_index[1], priv->cfp.used);
+ 
+-      rule_index[1] = find_first_zero_bit(priv->cfp.used,
+-                                          bcm_sf2_cfp_rule_size(priv));
+-      if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) {
++      rule_index[0] = find_first_zero_bit(priv->cfp.used,
++                                          priv->num_cfp_rules);
++      if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) {
+               ret = -ENOSPC;
+               goto out_err;
+       }
+@@ -715,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(str
+       /* Flag the second half rule as being used now, return it as the
+        * location, and flag it as unique while dumping rules
+        */
+-      set_bit(rule_index[1], priv->cfp.used);
++      set_bit(rule_index[0], priv->cfp.used);
+       set_bit(rule_index[1], priv->cfp.unique);
+       fs->location = rule_index[1];
+ 
+       return ret;
+ 
+ out_err:
+-      clear_bit(rule_index[0], priv->cfp.used);
++      clear_bit(rule_index[1], priv->cfp.used);
+       return ret;
+ }
+ 
diff --git a/queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch b/queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch

new file mode 100644 (file)

index 0000000..c20f779
--- /dev/null
+++ b/queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch
@@ -0,0 +1,42 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 15 May 2018 16:01:23 -0700
+Subject: net: dsa: bcm_sf2: Fix RX_CLS_LOC_ANY overwrite for last rule
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 43a5e00f38fe8933a1c716bfe5b30e97f749d94b ]
+
+When we let the kernel pick up a rule location with RX_CLS_LOC_ANY, we
+would be able to overwrite the last rules because of a number of issues.
+
+The IPv4 code path would not be checking that rule_index is within
+bounds, and it would also only be allowed to pick up rules from range
+0..126 instead of the full 0..127 range. This would lead us to allow
+overwriting the last rule when we let the kernel pick-up the location.
+
+Fixes: 3306145866b6 ("net: dsa: bcm_sf2: Move IPv4 CFP processing to specific functions")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2_cfp.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(str
+       /* Locate the first rule available */
+       if (fs->location == RX_CLS_LOC_ANY)
+               rule_index = find_first_zero_bit(priv->cfp.used,
+-                                               bcm_sf2_cfp_rule_size(priv));
++                                               priv->num_cfp_rules);
+       else
+               rule_index = fs->location;
+ 
++      if (rule_index > bcm_sf2_cfp_rule_size(priv))
++              return -ENOSPC;
++
+       layout = &udf_tcpip4_layout;
+       /* We only use one UDF slice for now */
+       slice_num = bcm_sf2_get_slice_number(layout, 0);
diff --git a/queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch b/queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch

new file mode 100644 (file)

index 0000000..9eb5019
--- /dev/null
+++ b/queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch
@@ -0,0 +1,50 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 17 May 2018 16:55:39 -0700
+Subject: net: dsa: Do not register devlink for unused ports
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 5447d78623da2eded06d4cd9469d1a71eba43bc4 ]
+
+Even if commit 1d27732f411d ("net: dsa: setup and teardown ports") indicated
+that registering a devlink instance for unused ports is not a problem, and this
+is true, this can be confusing nonetheless, so let's not do it.
+
+Fixes: 1d27732f411d ("net: dsa: setup and teardown ports")
+Reported-by: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cp
+ static int dsa_port_setup(struct dsa_port *dp)
+ {
+       struct dsa_switch *ds = dp->ds;
+-      int err;
++      int err = 0;
+ 
+       memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
+ 
+-      err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
++      if (dp->type != DSA_PORT_TYPE_UNUSED)
++              err = devlink_port_register(ds->devlink, &dp->devlink_port,
++                                          dp->index);
+       if (err)
+               return err;
+ 
+@@ -293,7 +295,8 @@ static int dsa_port_setup(struct dsa_por
+ 
+ static void dsa_port_teardown(struct dsa_port *dp)
+ {
+-      devlink_port_unregister(&dp->devlink_port);
++      if (dp->type != DSA_PORT_TYPE_UNUSED)
++              devlink_port_unregister(&dp->devlink_port);
+ 
+       switch (dp->type) {
+       case DSA_PORT_TYPE_UNUSED:
diff --git a/queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch b/queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch

new file mode 100644 (file)

index 0000000..12ee680
--- /dev/null
+++ b/queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch
@@ -0,0 +1,33 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Amritha Nambiar <amritha.nambiar@intel.com>
+Date: Thu, 17 May 2018 14:50:44 -0700
+Subject: net: Fix a bug in removing queues from XPS map
+
+From: Amritha Nambiar <amritha.nambiar@intel.com>
+
+[ Upstream commit 6358d49ac23995fdfe157cc8747ab0f274d3954b ]
+
+While removing queues from the XPS map, the individual CPU ID
+alone was used to index the CPUs map, this should be changed to also
+factor in the traffic class mapping for the CPU-to-queue lookup.
+
+Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
+Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2097,7 +2097,7 @@ static bool remove_xps_queue_cpu(struct
+               int i, j;
+ 
+               for (i = count, j = offset; i--; j++) {
+-                      if (!remove_xps_queue(dev_maps, cpu, j))
++                      if (!remove_xps_queue(dev_maps, tci, j))
+                               break;
+               }
+ 
diff --git a/queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch b/queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch

new file mode 100644 (file)

index 0000000..e4e9fa4
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch
@@ -0,0 +1,193 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:15 +0200
+Subject: net: ip6_gre: Fix headroom request in ip6erspan_tunnel_xmit()
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit 5691484df961aff897d824bcc26cd1a2aa036b5b ]
+
+dev->needed_headroom is not primed until ip6_tnl_xmit(), so it starts
+out zero. Thus the call to skb_cow_head() fails to actually make sure
+there's enough headroom to push the ERSPAN headers to. That can lead to
+the panic cited below. (Reproducer below that).
+
+Fix by requesting either needed_headroom if already primed, or just the
+bare minimum needed for the header otherwise.
+
+[  190.703567] kernel BUG at net/core/skbuff.c:104!
+[  190.708384] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
+[  190.714007] Modules linked in: act_mirred cls_matchall ip6_gre ip6_tunnel tunnel6 gre sch_ingress vrf veth x86_pkg_temp_thermal mlx_platform nfsd e1000e leds_mlxcpld
+[  190.728975] CPU: 1 PID: 959 Comm: kworker/1:2 Not tainted 4.17.0-rc4-net_master-custom-139 #10
+[  190.737647] Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2F"/"SA000874", BIOS 4.6.5 03/08/2016
+[  190.747006] Workqueue: ipv6_addrconf addrconf_dad_work
+[  190.752222] RIP: 0010:skb_panic+0xc3/0x100
+[  190.756358] RSP: 0018:ffff8801d54072f0 EFLAGS: 00010282
+[  190.761629] RAX: 0000000000000085 RBX: ffff8801c1a8ecc0 RCX: 0000000000000000
+[  190.768830] RDX: 0000000000000085 RSI: dffffc0000000000 RDI: ffffed003aa80e54
+[  190.776025] RBP: ffff8801bd1ec5a0 R08: ffffed003aabce19 R09: ffffed003aabce19
+[  190.783226] R10: 0000000000000001 R11: ffffed003aabce18 R12: ffff8801bf695dbe
+[  190.790418] R13: 0000000000000084 R14: 00000000000006c0 R15: ffff8801bf695dc8
+[  190.797621] FS:  0000000000000000(0000) GS:ffff8801d5400000(0000) knlGS:0000000000000000
+[  190.805786] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  190.811582] CR2: 000055fa929aced0 CR3: 0000000003228004 CR4: 00000000001606e0
+[  190.818790] Call Trace:
+[  190.821264]  <IRQ>
+[  190.823314]  ? ip6erspan_tunnel_xmit+0x5e4/0x1982 [ip6_gre]
+[  190.828940]  ? ip6erspan_tunnel_xmit+0x5e4/0x1982 [ip6_gre]
+[  190.834562]  skb_push+0x78/0x90
+[  190.837749]  ip6erspan_tunnel_xmit+0x5e4/0x1982 [ip6_gre]
+[  190.843219]  ? ip6gre_tunnel_ioctl+0xd90/0xd90 [ip6_gre]
+[  190.848577]  ? debug_check_no_locks_freed+0x210/0x210
+[  190.853679]  ? debug_check_no_locks_freed+0x210/0x210
+[  190.858783]  ? print_irqtrace_events+0x120/0x120
+[  190.863451]  ? sched_clock_cpu+0x18/0x210
+[  190.867496]  ? cyc2ns_read_end+0x10/0x10
+[  190.871474]  ? skb_network_protocol+0x76/0x200
+[  190.875977]  dev_hard_start_xmit+0x137/0x770
+[  190.880317]  ? do_raw_spin_trylock+0x6d/0xa0
+[  190.884624]  sch_direct_xmit+0x2ef/0x5d0
+[  190.888589]  ? pfifo_fast_dequeue+0x3fa/0x670
+[  190.892994]  ? pfifo_fast_change_tx_queue_len+0x810/0x810
+[  190.898455]  ? __lock_is_held+0xa0/0x160
+[  190.902422]  __qdisc_run+0x39e/0xfc0
+[  190.906041]  ? _raw_spin_unlock+0x29/0x40
+[  190.910090]  ? pfifo_fast_enqueue+0x24b/0x3e0
+[  190.914501]  ? sch_direct_xmit+0x5d0/0x5d0
+[  190.918658]  ? pfifo_fast_dequeue+0x670/0x670
+[  190.923047]  ? __dev_queue_xmit+0x172/0x1770
+[  190.927365]  ? preempt_count_sub+0xf/0xd0
+[  190.931421]  __dev_queue_xmit+0x410/0x1770
+[  190.935553]  ? ___slab_alloc+0x605/0x930
+[  190.939524]  ? print_irqtrace_events+0x120/0x120
+[  190.944186]  ? memcpy+0x34/0x50
+[  190.947364]  ? netdev_pick_tx+0x1c0/0x1c0
+[  190.951428]  ? __skb_clone+0x2fd/0x3d0
+[  190.955218]  ? __copy_skb_header+0x270/0x270
+[  190.959537]  ? rcu_read_lock_sched_held+0x93/0xa0
+[  190.964282]  ? kmem_cache_alloc+0x344/0x4d0
+[  190.968520]  ? cyc2ns_read_end+0x10/0x10
+[  190.972495]  ? skb_clone+0x123/0x230
+[  190.976112]  ? skb_split+0x820/0x820
+[  190.979747]  ? tcf_mirred+0x554/0x930 [act_mirred]
+[  190.984582]  tcf_mirred+0x554/0x930 [act_mirred]
+[  190.989252]  ? tcf_mirred_act_wants_ingress.part.2+0x10/0x10 [act_mirred]
+[  190.996109]  ? __lock_acquire+0x706/0x26e0
+[  191.000239]  ? sched_clock_cpu+0x18/0x210
+[  191.004294]  tcf_action_exec+0xcf/0x2a0
+[  191.008179]  tcf_classify+0xfa/0x340
+[  191.011794]  __netif_receive_skb_core+0x8e1/0x1c60
+[  191.016630]  ? debug_check_no_locks_freed+0x210/0x210
+[  191.021732]  ? nf_ingress+0x500/0x500
+[  191.025458]  ? process_backlog+0x347/0x4b0
+[  191.029619]  ? print_irqtrace_events+0x120/0x120
+[  191.034302]  ? lock_acquire+0xd8/0x320
+[  191.038089]  ? process_backlog+0x1b6/0x4b0
+[  191.042246]  ? process_backlog+0xc2/0x4b0
+[  191.046303]  process_backlog+0xc2/0x4b0
+[  191.050189]  net_rx_action+0x5cc/0x980
+[  191.053991]  ? napi_complete_done+0x2c0/0x2c0
+[  191.058386]  ? mark_lock+0x13d/0xb40
+[  191.062001]  ? clockevents_program_event+0x6b/0x1d0
+[  191.066922]  ? print_irqtrace_events+0x120/0x120
+[  191.071593]  ? __lock_is_held+0xa0/0x160
+[  191.075566]  __do_softirq+0x1d4/0x9d2
+[  191.079282]  ? ip6_finish_output2+0x524/0x1460
+[  191.083771]  do_softirq_own_stack+0x2a/0x40
+[  191.087994]  </IRQ>
+[  191.090130]  do_softirq.part.13+0x38/0x40
+[  191.094178]  __local_bh_enable_ip+0x135/0x190
+[  191.098591]  ip6_finish_output2+0x54d/0x1460
+[  191.102916]  ? ip6_forward_finish+0x2f0/0x2f0
+[  191.107314]  ? ip6_mtu+0x3c/0x2c0
+[  191.110674]  ? ip6_finish_output+0x2f8/0x650
+[  191.114992]  ? ip6_output+0x12a/0x500
+[  191.118696]  ip6_output+0x12a/0x500
+[  191.122223]  ? ip6_route_dev_notify+0x5b0/0x5b0
+[  191.126807]  ? ip6_finish_output+0x650/0x650
+[  191.131120]  ? ip6_fragment+0x1a60/0x1a60
+[  191.135182]  ? icmp6_dst_alloc+0x26e/0x470
+[  191.139317]  mld_sendpack+0x672/0x830
+[  191.143021]  ? igmp6_mcf_seq_next+0x2f0/0x2f0
+[  191.147429]  ? __local_bh_enable_ip+0x77/0x190
+[  191.151913]  ipv6_mc_dad_complete+0x47/0x90
+[  191.156144]  addrconf_dad_completed+0x561/0x720
+[  191.160731]  ? addrconf_rs_timer+0x3a0/0x3a0
+[  191.165036]  ? mark_held_locks+0xc9/0x140
+[  191.169095]  ? __local_bh_enable_ip+0x77/0x190
+[  191.173570]  ? addrconf_dad_work+0x50d/0xa20
+[  191.177886]  ? addrconf_dad_work+0x529/0xa20
+[  191.182194]  addrconf_dad_work+0x529/0xa20
+[  191.186342]  ? addrconf_dad_completed+0x720/0x720
+[  191.191088]  ? __lock_is_held+0xa0/0x160
+[  191.195059]  ? process_one_work+0x45d/0xe20
+[  191.199302]  ? process_one_work+0x51e/0xe20
+[  191.203531]  ? rcu_read_lock_sched_held+0x93/0xa0
+[  191.208279]  process_one_work+0x51e/0xe20
+[  191.212340]  ? pwq_dec_nr_in_flight+0x200/0x200
+[  191.216912]  ? get_lock_stats+0x4b/0xf0
+[  191.220788]  ? preempt_count_sub+0xf/0xd0
+[  191.224844]  ? worker_thread+0x219/0x860
+[  191.228823]  ? do_raw_spin_trylock+0x6d/0xa0
+[  191.233142]  worker_thread+0xeb/0x860
+[  191.236848]  ? process_one_work+0xe20/0xe20
+[  191.241095]  kthread+0x206/0x300
+[  191.244352]  ? process_one_work+0xe20/0xe20
+[  191.248587]  ? kthread_stop+0x570/0x570
+[  191.252459]  ret_from_fork+0x3a/0x50
+[  191.256082] Code: 14 3e ff 8b 4b 78 55 4d 89 f9 41 56 41 55 48 c7 c7 a0 cf db 82 41 54 44 8b 44 24 2c 48 8b 54 24 30 48 8b 74 24 20 e8 16 94 13 ff <0f> 0b 48 c7 c7 60 8e 1f 85 48 83 c4 20 e8 55 ef a6 ff 89 74 24
+[  191.275327] RIP: skb_panic+0xc3/0x100 RSP: ffff8801d54072f0
+[  191.281024] ---[ end trace 7ea51094e099e006 ]---
+[  191.285724] Kernel panic - not syncing: Fatal exception in interrupt
+[  191.292168] Kernel Offset: disabled
+[  191.295697] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
+
+Reproducer:
+
+       ip link add h1 type veth peer name swp1
+       ip link add h3 type veth peer name swp3
+
+       ip link set dev h1 up
+       ip address add 192.0.2.1/28 dev h1
+
+       ip link add dev vh3 type vrf table 20
+       ip link set dev h3 master vh3
+       ip link set dev vh3 up
+       ip link set dev h3 up
+
+       ip link set dev swp3 up
+       ip address add dev swp3 2001:db8:2::1/64
+
+       ip link set dev swp1 up
+       tc qdisc add dev swp1 clsact
+
+       ip link add name gt6 type ip6erspan \
+               local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123
+       ip link set dev gt6 up
+
+       sleep 1
+
+       tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+               action mirred egress mirror dev gt6
+       ping -I h1 192.0.2.2
+
+Fixes: e41c7c68ea77 ("ip6erspan: make sure enough headroom at xmit.")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -908,7 +908,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+               truncate = true;
+       }
+ 
+-      if (skb_cow_head(skb, dev->needed_headroom))
++      if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
+               goto tx_err;
+ 
+       t->parms.o_flags &= ~TUNNEL_KEY;
diff --git a/queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch b/queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch

new file mode 100644 (file)

index 0000000..2d8d2ba
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch
@@ -0,0 +1,151 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:51 +0200
+Subject: net: ip6_gre: Fix ip6erspan hlen calculation
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit 2d665034f239412927b1e71329f20f001c92da09 ]
+
+Even though ip6erspan_tap_init() sets up hlen and tun_hlen according to
+what ERSPAN needs, it goes ahead to call ip6gre_tnl_link_config() which
+overwrites these settings with GRE-specific ones.
+
+Similarly for changelink callbacks, which are handled by
+ip6gre_changelink() calls ip6gre_tnl_change() calls
+ip6gre_tnl_link_config() as well.
+
+The difference ends up being 12 vs. 20 bytes, and this is generally not
+a problem, because a 12-byte request likely ends up allocating more and
+the extra 8 bytes are thus available. However correct it is not.
+
+So replace the newlink and changelink callbacks with an ERSPAN-specific
+ones, reusing the newly-introduced _common() functions.
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   74 ++++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 65 insertions(+), 9 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -81,6 +81,7 @@ static int ip6gre_tunnel_init(struct net
+ static void ip6gre_tunnel_setup(struct net_device *dev);
+ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
++static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+ 
+ /* Tunnel hash table */
+ 
+@@ -1746,6 +1747,19 @@ static const struct net_device_ops ip6gr
+       .ndo_get_iflink = ip6_tnl_get_iflink,
+ };
+ 
++static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel)
++{
++      int t_hlen;
++
++      tunnel->tun_hlen = 8;
++      tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
++                     erspan_hdr_len(tunnel->parms.erspan_ver);
++
++      t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
++      tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
++      return t_hlen;
++}
++
+ static int ip6erspan_tap_init(struct net_device *dev)
+ {
+       struct ip6_tnl *tunnel;
+@@ -1769,12 +1783,7 @@ static int ip6erspan_tap_init(struct net
+               return ret;
+       }
+ 
+-      tunnel->tun_hlen = 8;
+-      tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+-                     erspan_hdr_len(tunnel->parms.erspan_ver);
+-      t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+-
+-      dev->hard_header_len = LL_MAX_HEADER + t_hlen;
++      t_hlen = ip6erspan_calc_hlen(tunnel);
+       dev->mtu = ETH_DATA_LEN - t_hlen;
+       if (dev->type == ARPHRD_ETHER)
+               dev->mtu -= ETH_HLEN;
+@@ -1783,7 +1792,7 @@ static int ip6erspan_tap_init(struct net
+ 
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+       tunnel = netdev_priv(dev);
+-      ip6gre_tnl_link_config(tunnel, 1);
++      ip6erspan_tnl_link_config(tunnel, 1);
+ 
+       return 0;
+ }
+@@ -2108,6 +2117,53 @@ static void ip6erspan_tap_setup(struct n
+       netif_keep_dst(dev);
+ }
+ 
++static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
++                           struct nlattr *tb[], struct nlattr *data[],
++                           struct netlink_ext_ack *extack)
++{
++      int err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
++      struct ip6_tnl *nt = netdev_priv(dev);
++      struct net *net = dev_net(dev);
++
++      if (!err) {
++              ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
++              ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
++      }
++      return err;
++}
++
++static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu)
++{
++      ip6gre_tnl_link_config_common(t);
++      ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t));
++}
++
++static int ip6erspan_tnl_change(struct ip6_tnl *t,
++                              const struct __ip6_tnl_parm *p, int set_mtu)
++{
++      ip6gre_tnl_copy_tnl_parm(t, p);
++      ip6erspan_tnl_link_config(t, set_mtu);
++      return 0;
++}
++
++static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
++                              struct nlattr *data[],
++                              struct netlink_ext_ack *extack)
++{
++      struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
++      struct __ip6_tnl_parm p;
++      struct ip6_tnl *t;
++
++      t = ip6gre_changelink_common(dev, tb, data, &p, extack);
++      if (IS_ERR(t))
++              return PTR_ERR(t);
++
++      ip6gre_tunnel_unlink(ign, t);
++      ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
++      ip6gre_tunnel_link(ign, t);
++      return 0;
++}
++
+ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
+       .kind           = "ip6gre",
+       .maxtype        = IFLA_GRE_MAX,
+@@ -2144,8 +2200,8 @@ static struct rtnl_link_ops ip6erspan_ta
+       .priv_size      = sizeof(struct ip6_tnl),
+       .setup          = ip6erspan_tap_setup,
+       .validate       = ip6erspan_tap_validate,
+-      .newlink        = ip6gre_newlink,
+-      .changelink     = ip6gre_changelink,
++      .newlink        = ip6erspan_newlink,
++      .changelink     = ip6erspan_changelink,
+       .get_size       = ip6gre_get_size,
+       .fill_info      = ip6gre_fill_info,
+       .get_link_net   = ip6_tnl_get_link_net,
diff --git a/queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch b/queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch

new file mode 100644 (file)

index 0000000..4683e05
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch
@@ -0,0 +1,248 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: William Tu <u9012063@gmail.com>
+Date: Fri, 18 May 2018 19:22:28 -0700
+Subject: net: ip6_gre: fix tunnel metadata device sharing.
+
+From: William Tu <u9012063@gmail.com>
+
+[ Upstream commit b80d0b93b991e551a32157e0d9d38fc5bc9348a7 ]
+
+Currently ip6gre and ip6erspan share single metadata mode device,
+using 'collect_md_tun'.  Thus, when doing:
+  ip link add dev ip6gre11 type ip6gretap external
+  ip link add dev ip6erspan12 type ip6erspan external
+  RTNETLINK answers: File exists
+simply fails due to the 2nd tries to create the same collect_md_tun.
+
+The patch fixes it by adding a separate collect md tunnel device
+for the ip6erspan, 'collect_md_tun_erspan'.  As a result, a couple
+of places need to refactor/split up in order to distinguish ip6gre
+and ip6erspan.
+
+First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
+create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
+Then before link/unlink, make sure the link_md/unlink_md is called.
+Finally, a separate ndo_uninit is created for ip6erspan.  Tested it
+using the samples/bpf/test_tunnel_bpf.sh.
+
+Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
+Signed-off-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |  101 +++++++++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 79 insertions(+), 22 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -71,6 +71,7 @@ struct ip6gre_net {
+       struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
+ 
+       struct ip6_tnl __rcu *collect_md_tun;
++      struct ip6_tnl __rcu *collect_md_tun_erspan;
+       struct net_device *fb_tunnel_dev;
+ };
+ 
+@@ -233,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_loo
+       if (cand)
+               return cand;
+ 
+-      t = rcu_dereference(ign->collect_md_tun);
++      if (gre_proto == htons(ETH_P_ERSPAN) ||
++          gre_proto == htons(ETH_P_ERSPAN2))
++              t = rcu_dereference(ign->collect_md_tun_erspan);
++      else
++              t = rcu_dereference(ign->collect_md_tun);
++
+       if (t && t->dev->flags & IFF_UP)
+               return t;
+ 
+@@ -262,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_b
+       return &ign->tunnels[prio][h];
+ }
+ 
++static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
++{
++      if (t->parms.collect_md)
++              rcu_assign_pointer(ign->collect_md_tun, t);
++}
++
++static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
++{
++      if (t->parms.collect_md)
++              rcu_assign_pointer(ign->collect_md_tun_erspan, t);
++}
++
++static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
++{
++      if (t->parms.collect_md)
++              rcu_assign_pointer(ign->collect_md_tun, NULL);
++}
++
++static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
++                                     struct ip6_tnl *t)
++{
++      if (t->parms.collect_md)
++              rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
++}
++
+ static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+               const struct ip6_tnl *t)
+ {
+@@ -272,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip
+ {
+       struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+ 
+-      if (t->parms.collect_md)
+-              rcu_assign_pointer(ign->collect_md_tun, t);
+-
+       rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+       rcu_assign_pointer(*tp, t);
+ }
+@@ -284,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct
+       struct ip6_tnl __rcu **tp;
+       struct ip6_tnl *iter;
+ 
+-      if (t->parms.collect_md)
+-              rcu_assign_pointer(ign->collect_md_tun, NULL);
+-
+       for (tp = ip6gre_bucket(ign, t);
+            (iter = rtnl_dereference(*tp)) != NULL;
+            tp = &iter->next) {
+@@ -375,11 +400,23 @@ failed_free:
+       return NULL;
+ }
+ 
++static void ip6erspan_tunnel_uninit(struct net_device *dev)
++{
++      struct ip6_tnl *t = netdev_priv(dev);
++      struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
++
++      ip6erspan_tunnel_unlink_md(ign, t);
++      ip6gre_tunnel_unlink(ign, t);
++      dst_cache_reset(&t->dst_cache);
++      dev_put(dev);
++}
++
+ static void ip6gre_tunnel_uninit(struct net_device *dev)
+ {
+       struct ip6_tnl *t = netdev_priv(dev);
+       struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+ 
++      ip6gre_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       dst_cache_reset(&t->dst_cache);
+       dev_put(dev);
+@@ -1799,7 +1836,7 @@ static int ip6erspan_tap_init(struct net
+ 
+ static const struct net_device_ops ip6erspan_netdev_ops = {
+       .ndo_init =             ip6erspan_tap_init,
+-      .ndo_uninit =           ip6gre_tunnel_uninit,
++      .ndo_uninit =           ip6erspan_tunnel_uninit,
+       .ndo_start_xmit =       ip6erspan_tunnel_xmit,
+       .ndo_set_mac_address =  eth_mac_addr,
+       .ndo_validate_addr =    eth_validate_addr,
+@@ -1862,8 +1899,6 @@ static int ip6gre_newlink_common(struct
+                                struct netlink_ext_ack *extack)
+ {
+       struct ip6_tnl *nt;
+-      struct net *net = dev_net(dev);
+-      struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+       struct ip_tunnel_encap ipencap;
+       int err;
+ 
+@@ -1876,16 +1911,6 @@ static int ip6gre_newlink_common(struct
+                       return err;
+       }
+ 
+-      ip6gre_netlink_parms(data, &nt->parms);
+-
+-      if (nt->parms.collect_md) {
+-              if (rtnl_dereference(ign->collect_md_tun))
+-                      return -EEXIST;
+-      } else {
+-              if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+-                      return -EEXIST;
+-      }
+-
+       if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+               eth_hw_addr_random(dev);
+ 
+@@ -1909,12 +1934,26 @@ static int ip6gre_newlink(struct net *sr
+                         struct nlattr *tb[], struct nlattr *data[],
+                         struct netlink_ext_ack *extack)
+ {
+-      int err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       struct ip6_tnl *nt = netdev_priv(dev);
+       struct net *net = dev_net(dev);
++      struct ip6gre_net *ign;
++      int err;
++
++      ip6gre_netlink_parms(data, &nt->parms);
++      ign = net_generic(net, ip6gre_net_id);
++
++      if (nt->parms.collect_md) {
++              if (rtnl_dereference(ign->collect_md_tun))
++                      return -EEXIST;
++      } else {
++              if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
++                      return -EEXIST;
++      }
+ 
++      err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       if (!err) {
+               ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
++              ip6gre_tunnel_link_md(ign, nt);
+               ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+       }
+       return err;
+@@ -1966,8 +2005,10 @@ static int ip6gre_changelink(struct net_
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+ 
++      ip6gre_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
++      ip6gre_tunnel_link_md(ign, t);
+       ip6gre_tunnel_link(ign, t);
+       return 0;
+ }
+@@ -2121,12 +2162,26 @@ static int ip6erspan_newlink(struct net
+                            struct nlattr *tb[], struct nlattr *data[],
+                            struct netlink_ext_ack *extack)
+ {
+-      int err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       struct ip6_tnl *nt = netdev_priv(dev);
+       struct net *net = dev_net(dev);
++      struct ip6gre_net *ign;
++      int err;
++
++      ip6gre_netlink_parms(data, &nt->parms);
++      ign = net_generic(net, ip6gre_net_id);
++
++      if (nt->parms.collect_md) {
++              if (rtnl_dereference(ign->collect_md_tun_erspan))
++                      return -EEXIST;
++      } else {
++              if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
++                      return -EEXIST;
++      }
+ 
++      err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+       if (!err) {
+               ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
++              ip6erspan_tunnel_link_md(ign, nt);
+               ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+       }
+       return err;
+@@ -2158,8 +2213,10 @@ static int ip6erspan_changelink(struct n
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+ 
++      ip6gre_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
+       ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
++      ip6erspan_tunnel_link_md(ign, t);
+       ip6gre_tunnel_link(ign, t);
+       return 0;
+ }
diff --git a/queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch b/queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch

new file mode 100644 (file)

index 0000000..80c87ec
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch
@@ -0,0 +1,155 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:10 +0200
+Subject: net: ip6_gre: Request headroom in __gre6_xmit()
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit 01b8d064d58b4c1f0eff47f8fe8a8508cb3b3840 ]
+
+__gre6_xmit() pushes GRE headers before handing over to ip6_tnl_xmit()
+for generic IP-in-IP processing. However it doesn't make sure that there
+is enough headroom to push the header to. That can lead to the panic
+cited below. (Reproducer below that).
+
+Fix by requesting either needed_headroom if already primed, or just the
+bare minimum needed for the header otherwise.
+
+[  158.576725] kernel BUG at net/core/skbuff.c:104!
+[  158.581510] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
+[  158.587174] Modules linked in: act_mirred cls_matchall ip6_gre ip6_tunnel tunnel6 gre sch_ingress vrf veth x86_pkg_temp_thermal mlx_platform nfsd e1000e leds_mlxcpld
+[  158.602268] CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 4.17.0-rc4-net_master-custom-139 #10
+[  158.610938] Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2F"/"SA000874", BIOS 4.6.5 03/08/2016
+[  158.620426] RIP: 0010:skb_panic+0xc3/0x100
+[  158.624586] RSP: 0018:ffff8801d3f27110 EFLAGS: 00010286
+[  158.629882] RAX: 0000000000000082 RBX: ffff8801c02cc040 RCX: 0000000000000000
+[  158.637127] RDX: 0000000000000082 RSI: dffffc0000000000 RDI: ffffed003a7e4e18
+[  158.644366] RBP: ffff8801bfec8020 R08: ffffed003aabce19 R09: ffffed003aabce19
+[  158.651574] R10: 000000000000000b R11: ffffed003aabce18 R12: ffff8801c364de66
+[  158.658786] R13: 000000000000002c R14: 00000000000000c0 R15: ffff8801c364de68
+[  158.666007] FS:  0000000000000000(0000) GS:ffff8801d5400000(0000) knlGS:0000000000000000
+[  158.674212] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  158.680036] CR2: 00007f4b3702dcd0 CR3: 0000000003228002 CR4: 00000000001606e0
+[  158.687228] Call Trace:
+[  158.689752]  ? __gre6_xmit+0x246/0xd80 [ip6_gre]
+[  158.694475]  ? __gre6_xmit+0x246/0xd80 [ip6_gre]
+[  158.699141]  skb_push+0x78/0x90
+[  158.702344]  __gre6_xmit+0x246/0xd80 [ip6_gre]
+[  158.706872]  ip6gre_tunnel_xmit+0x3bc/0x610 [ip6_gre]
+[  158.711992]  ? __gre6_xmit+0xd80/0xd80 [ip6_gre]
+[  158.716668]  ? debug_check_no_locks_freed+0x210/0x210
+[  158.721761]  ? print_irqtrace_events+0x120/0x120
+[  158.726461]  ? sched_clock_cpu+0x18/0x210
+[  158.730572]  ? sched_clock_cpu+0x18/0x210
+[  158.734692]  ? cyc2ns_read_end+0x10/0x10
+[  158.738705]  ? skb_network_protocol+0x76/0x200
+[  158.743216]  ? netif_skb_features+0x1b2/0x550
+[  158.747648]  dev_hard_start_xmit+0x137/0x770
+[  158.752010]  sch_direct_xmit+0x2ef/0x5d0
+[  158.755992]  ? pfifo_fast_dequeue+0x3fa/0x670
+[  158.760460]  ? pfifo_fast_change_tx_queue_len+0x810/0x810
+[  158.765975]  ? __lock_is_held+0xa0/0x160
+[  158.770002]  __qdisc_run+0x39e/0xfc0
+[  158.773673]  ? _raw_spin_unlock+0x29/0x40
+[  158.777781]  ? pfifo_fast_enqueue+0x24b/0x3e0
+[  158.782191]  ? sch_direct_xmit+0x5d0/0x5d0
+[  158.786372]  ? pfifo_fast_dequeue+0x670/0x670
+[  158.790818]  ? __dev_queue_xmit+0x172/0x1770
+[  158.795195]  ? preempt_count_sub+0xf/0xd0
+[  158.799313]  __dev_queue_xmit+0x410/0x1770
+[  158.803512]  ? ___slab_alloc+0x605/0x930
+[  158.807525]  ? ___slab_alloc+0x605/0x930
+[  158.811540]  ? memcpy+0x34/0x50
+[  158.814768]  ? netdev_pick_tx+0x1c0/0x1c0
+[  158.818895]  ? __skb_clone+0x2fd/0x3d0
+[  158.822712]  ? __copy_skb_header+0x270/0x270
+[  158.827079]  ? rcu_read_lock_sched_held+0x93/0xa0
+[  158.831903]  ? kmem_cache_alloc+0x344/0x4d0
+[  158.836199]  ? skb_clone+0x123/0x230
+[  158.839869]  ? skb_split+0x820/0x820
+[  158.843521]  ? tcf_mirred+0x554/0x930 [act_mirred]
+[  158.848407]  tcf_mirred+0x554/0x930 [act_mirred]
+[  158.853104]  ? tcf_mirred_act_wants_ingress.part.2+0x10/0x10 [act_mirred]
+[  158.860005]  ? __lock_acquire+0x706/0x26e0
+[  158.864162]  ? mark_lock+0x13d/0xb40
+[  158.867832]  tcf_action_exec+0xcf/0x2a0
+[  158.871736]  tcf_classify+0xfa/0x340
+[  158.875402]  __netif_receive_skb_core+0x8e1/0x1c60
+[  158.880334]  ? nf_ingress+0x500/0x500
+[  158.884059]  ? process_backlog+0x347/0x4b0
+[  158.888241]  ? lock_acquire+0xd8/0x320
+[  158.892050]  ? process_backlog+0x1b6/0x4b0
+[  158.896228]  ? process_backlog+0xc2/0x4b0
+[  158.900291]  process_backlog+0xc2/0x4b0
+[  158.904210]  net_rx_action+0x5cc/0x980
+[  158.908047]  ? napi_complete_done+0x2c0/0x2c0
+[  158.912525]  ? rcu_read_unlock+0x80/0x80
+[  158.916534]  ? __lock_is_held+0x34/0x160
+[  158.920541]  __do_softirq+0x1d4/0x9d2
+[  158.924308]  ? trace_event_raw_event_irq_handler_exit+0x140/0x140
+[  158.930515]  run_ksoftirqd+0x1d/0x40
+[  158.934152]  smpboot_thread_fn+0x32b/0x690
+[  158.938299]  ? sort_range+0x20/0x20
+[  158.941842]  ? preempt_count_sub+0xf/0xd0
+[  158.945940]  ? schedule+0x5b/0x140
+[  158.949412]  kthread+0x206/0x300
+[  158.952689]  ? sort_range+0x20/0x20
+[  158.956249]  ? kthread_stop+0x570/0x570
+[  158.960164]  ret_from_fork+0x3a/0x50
+[  158.963823] Code: 14 3e ff 8b 4b 78 55 4d 89 f9 41 56 41 55 48 c7 c7 a0 cf db 82 41 54 44 8b 44 24 2c 48 8b 54 24 30 48 8b 74 24 20 e8 16 94 13 ff <0f> 0b 48 c7 c7 60 8e 1f 85 48 83 c4 20 e8 55 ef a6 ff 89 74 24
+[  158.983235] RIP: skb_panic+0xc3/0x100 RSP: ffff8801d3f27110
+[  158.988935] ---[ end trace 5af56ee845aa6cc8 ]---
+[  158.993641] Kernel panic - not syncing: Fatal exception in interrupt
+[  159.000176] Kernel Offset: disabled
+[  159.003767] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
+
+Reproducer:
+
+       ip link add h1 type veth peer name swp1
+       ip link add h3 type veth peer name swp3
+
+       ip link set dev h1 up
+       ip address add 192.0.2.1/28 dev h1
+
+       ip link add dev vh3 type vrf table 20
+       ip link set dev h3 master vh3
+       ip link set dev vh3 up
+       ip link set dev h3 up
+
+       ip link set dev swp3 up
+       ip address add dev swp3 2001:db8:2::1/64
+
+       ip link set dev swp1 up
+       tc qdisc add dev swp1 clsact
+
+       ip link add name gt6 type ip6gretap \
+               local 2001:db8:2::1 remote 2001:db8:2::2
+       ip link set dev gt6 up
+
+       sleep 1
+
+       tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+               action mirred egress mirror dev gt6
+       ping -I h1 192.0.2.2
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -701,6 +701,9 @@ static netdev_tx_t __gre6_xmit(struct sk
+       if (tunnel->parms.o_flags & TUNNEL_SEQ)
+               tunnel->o_seqno++;
+ 
++      if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
++              return -ENOMEM;
++
+       /* Push GRE header. */
+       protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
+ 
diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch

new file mode 100644 (file)

index 0000000..6fc6da6
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch
@@ -0,0 +1,87 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:45 +0200
+Subject: net: ip6_gre: Split up ip6gre_changelink()
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit c8632fc30bb03aa0c3bd7bcce85355a10feb8149 ]
+
+Extract from ip6gre_changelink() a reusable function
+ip6gre_changelink_common(). This will allow introduction of
+ERSPAN-specific _changelink() function with not a lot of code
+duplication.
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   33 ++++++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 9 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1911,37 +1911,52 @@ static int ip6gre_newlink(struct net *sr
+       return err;
+ }
+ 
+-static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+-                           struct nlattr *data[],
+-                           struct netlink_ext_ack *extack)
++static struct ip6_tnl *
++ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
++                       struct nlattr *data[], struct __ip6_tnl_parm *p_p,
++                       struct netlink_ext_ack *extack)
+ {
+       struct ip6_tnl *t, *nt = netdev_priv(dev);
+       struct net *net = nt->net;
+       struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+-      struct __ip6_tnl_parm p;
+       struct ip_tunnel_encap ipencap;
+ 
+       if (dev == ign->fb_tunnel_dev)
+-              return -EINVAL;
++              return ERR_PTR(-EINVAL);
+ 
+       if (ip6gre_netlink_encap_parms(data, &ipencap)) {
+               int err = ip6_tnl_encap_setup(nt, &ipencap);
+ 
+               if (err < 0)
+-                      return err;
++                      return ERR_PTR(err);
+       }
+ 
+-      ip6gre_netlink_parms(data, &p);
++      ip6gre_netlink_parms(data, p_p);
+ 
+-      t = ip6gre_tunnel_locate(net, &p, 0);
++      t = ip6gre_tunnel_locate(net, p_p, 0);
+ 
+       if (t) {
+               if (t->dev != dev)
+-                      return -EEXIST;
++                      return ERR_PTR(-EEXIST);
+       } else {
+               t = nt;
+       }
+ 
++      return t;
++}
++
++static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
++                           struct nlattr *data[],
++                           struct netlink_ext_ack *extack)
++{
++      struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
++      struct __ip6_tnl_parm p;
++      struct ip6_tnl *t;
++
++      t = ip6gre_changelink_common(dev, tb, data, &p, extack);
++      if (IS_ERR(t))
++              return PTR_ERR(t);
++
+       ip6gre_tunnel_unlink(ign, t);
+       ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+       ip6gre_tunnel_link(ign, t);
diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch

new file mode 100644 (file)

index 0000000..d6010d9
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch
@@ -0,0 +1,74 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:39 +0200
+Subject: net: ip6_gre: Split up ip6gre_newlink()
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit 7fa38a7c852ec99e3a7fc375eb2c21c50c2e46b8 ]
+
+Extract from ip6gre_newlink() a reusable function
+ip6gre_newlink_common(). The ip6gre_tnl_link_config() call needs to be
+made customizable for ERSPAN, thus reorder it with calls to
+ip6_tnl_change_mtu() and dev_hold(), and extract the whole tail to the
+caller, ip6gre_newlink(). Thus enable an ERSPAN-specific _newlink()
+function without a lot of duplicity.
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1848,9 +1848,9 @@ static bool ip6gre_netlink_encap_parms(s
+       return ret;
+ }
+ 
+-static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+-                        struct nlattr *tb[], struct nlattr *data[],
+-                        struct netlink_ext_ack *extack)
++static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
++                               struct nlattr *tb[], struct nlattr *data[],
++                               struct netlink_ext_ack *extack)
+ {
+       struct ip6_tnl *nt;
+       struct net *net = dev_net(dev);
+@@ -1887,18 +1887,30 @@ static int ip6gre_newlink(struct net *sr
+       if (err)
+               goto out;
+ 
+-      ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+-
+       if (tb[IFLA_MTU])
+               ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+ 
+       dev_hold(dev);
+-      ip6gre_tunnel_link(ign, nt);
+ 
+ out:
+       return err;
+ }
+ 
++static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
++                        struct nlattr *tb[], struct nlattr *data[],
++                        struct netlink_ext_ack *extack)
++{
++      int err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
++      struct ip6_tnl *nt = netdev_priv(dev);
++      struct net *net = dev_net(dev);
++
++      if (!err) {
++              ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
++              ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
++      }
++      return err;
++}
++
+ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+                            struct nlattr *data[],
+                            struct netlink_ext_ack *extack)
diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch

new file mode 100644 (file)

index 0000000..0d07dbc
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch
@@ -0,0 +1,48 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:33 +0200
+Subject: net: ip6_gre: Split up ip6gre_tnl_change()
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit a6465350ef495f5cbd76a3e505d25a01d648477e ]
+
+Split a reusable function ip6gre_tnl_copy_tnl_parm() from
+ip6gre_tnl_change(). This will allow ERSPAN-specific code to
+reuse the common parts while customizing the behavior for ERSPAN.
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1106,8 +1106,8 @@ static void ip6gre_tnl_link_config(struc
+       ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
+ }
+ 
+-static int ip6gre_tnl_change(struct ip6_tnl *t,
+-      const struct __ip6_tnl_parm *p, int set_mtu)
++static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
++                                   const struct __ip6_tnl_parm *p)
+ {
+       t->parms.laddr = p->laddr;
+       t->parms.raddr = p->raddr;
+@@ -1123,6 +1123,12 @@ static int ip6gre_tnl_change(struct ip6_
+       t->parms.o_flags = p->o_flags;
+       t->parms.fwmark = p->fwmark;
+       dst_cache_reset(&t->dst_cache);
++}
++
++static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
++                           int set_mtu)
++{
++      ip6gre_tnl_copy_tnl_parm(t, p);
+       ip6gre_tnl_link_config(t, set_mtu);
+       return 0;
+ }
diff --git a/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch

new file mode 100644 (file)

index 0000000..a32525c
--- /dev/null
+++ b/queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch
@@ -0,0 +1,98 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Petr Machata <petrm@mellanox.com>
+Date: Thu, 17 May 2018 16:36:27 +0200
+Subject: net: ip6_gre: Split up ip6gre_tnl_link_config()
+
+From: Petr Machata <petrm@mellanox.com>
+
+[ Upstream commit a483373ead61e6079bc8ebe27e2dfdb2e3c1559f ]
+
+The function ip6gre_tnl_link_config() is used for setting up
+configuration of both ip6gretap and ip6erspan tunnels. Split the
+function into the common part and the route-lookup part. The latter then
+takes the calculated header length as an argument. This split will allow
+the patches down the line to sneak in a custom header length computation
+for the ERSPAN tunnel.
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Petr Machata <petrm@mellanox.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   38 ++++++++++++++++++++++++++------------
+ 1 file changed, 26 insertions(+), 12 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1019,12 +1019,11 @@ tx_err:
+       return NETDEV_TX_OK;
+ }
+ 
+-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
++static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
+ {
+       struct net_device *dev = t->dev;
+       struct __ip6_tnl_parm *p = &t->parms;
+       struct flowi6 *fl6 = &t->fl.u.ip6;
+-      int t_hlen;
+ 
+       if (dev->type != ARPHRD_ETHER) {
+               memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+@@ -1051,12 +1050,13 @@ static void ip6gre_tnl_link_config(struc
+               dev->flags |= IFF_POINTOPOINT;
+       else
+               dev->flags &= ~IFF_POINTOPOINT;
++}
+ 
+-      t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
+-
+-      t->hlen = t->encap_hlen + t->tun_hlen;
+-
+-      t_hlen = t->hlen + sizeof(struct ipv6hdr);
++static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
++                                       int t_hlen)
++{
++      const struct __ip6_tnl_parm *p = &t->parms;
++      struct net_device *dev = t->dev;
+ 
+       if (p->flags & IP6_TNL_F_CAP_XMIT) {
+               int strict = (ipv6_addr_type(&p->raddr) &
+@@ -1088,6 +1088,24 @@ static void ip6gre_tnl_link_config(struc
+       }
+ }
+ 
++static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
++{
++      int t_hlen;
++
++      tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
++      tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
++
++      t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
++      tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
++      return t_hlen;
++}
++
++static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
++{
++      ip6gre_tnl_link_config_common(t);
++      ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
++}
++
+ static int ip6gre_tnl_change(struct ip6_tnl *t,
+       const struct __ip6_tnl_parm *p, int set_mtu)
+ {
+@@ -1381,11 +1399,7 @@ static int ip6gre_tunnel_init_common(str
+               return ret;
+       }
+ 
+-      tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+-      tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+-      t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+-
+-      dev->hard_header_len = LL_MAX_HEADER + t_hlen;
++      t_hlen = ip6gre_calc_hlen(tunnel);
+       dev->mtu = ETH_DATA_LEN - t_hlen;
+       if (dev->type == ARPHRD_ETHER)
+               dev->mtu -= ETH_HLEN;
diff --git a/queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch b/queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch

new file mode 100644 (file)

index 0000000..fb87ec5
--- /dev/null
+++ b/queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch
@@ -0,0 +1,48 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Tarick Bedeir <tarick@google.com>
+Date: Sun, 13 May 2018 16:38:45 -0700
+Subject: net/mlx4_core: Fix error handling in mlx4_init_port_info.
+
+From: Tarick Bedeir <tarick@google.com>
+
+[ Upstream commit 57f6f99fdad9984801cde05c1db68fe39b474a10 ]
+
+Avoid exiting the function with a lingering sysfs file (if the first
+call to device_create_file() fails while the second succeeds), and avoid
+calling devlink_port_unregister() twice.
+
+In other words, either mlx4_init_port_info() succeeds and returns zero, or
+it fails, returns non-zero, and requires no cleanup.
+
+Fixes: 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports")
+Signed-off-by: Tarick Bedeir <tarick@google.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/main.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/main.c
++++ b/drivers/net/ethernet/mellanox/mlx4/main.c
+@@ -3007,6 +3007,7 @@ static int mlx4_init_port_info(struct ml
+               mlx4_err(dev, "Failed to create file for port %d\n", port);
+               devlink_port_unregister(&info->devlink_port);
+               info->port = -1;
++              return err;
+       }
+ 
+       sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+@@ -3028,9 +3029,10 @@ static int mlx4_init_port_info(struct ml
+                                  &info->port_attr);
+               devlink_port_unregister(&info->devlink_port);
+               info->port = -1;
++              return err;
+       }
+ 
+-      return err;
++      return 0;
+ }
+ 
+ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
diff --git a/queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch b/queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch

new file mode 100644 (file)

index 0000000..141c7f0
--- /dev/null
+++ b/queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch
@@ -0,0 +1,34 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Wed, 16 May 2018 12:54:29 +0200
+Subject: net/sched: fix refcnt leak in the error path of tcf_vlan_init()
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 5a4931ae0193f8a4a97e8260fd0df1d705d83299 ]
+
+Similarly to what was done with commit a52956dfc503 ("net sched actions:
+fix refcnt leak in skbmod"), fix the error path of tcf_vlan_init() to avoid
+refcnt leaks when wrong value of TCA_VLAN_PUSH_VLAN_PROTOCOL is given.
+
+Fixes: 5026c9b1bafc ("net sched: vlan action fix late binding")
+CC: Roman Mashak <mrv@mojatatu.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_vlan.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/act_vlan.c
++++ b/net/sched/act_vlan.c
+@@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net
+                       case htons(ETH_P_8021AD):
+                               break;
+                       default:
++                              if (exists)
++                                      tcf_idr_release(*a, bind);
+                               return -EPROTONOSUPPORT;
+                       }
+               } else {
diff --git a/queue-4.16/net-sched-red-avoid-hashing-null-child.patch b/queue-4.16/net-sched-red-avoid-hashing-null-child.patch

new file mode 100644 (file)

index 0000000..d748178
--- /dev/null
+++ b/queue-4.16/net-sched-red-avoid-hashing-null-child.patch
@@ -0,0 +1,108 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 18 May 2018 14:51:44 +0200
+Subject: net: sched: red: avoid hashing NULL child
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 44a63b137f7b6e4c7bd6c9cc21615941cb36509d ]
+
+Hangbin reported an Oops triggered by the syzkaller qdisc rules:
+
+ kasan: GPF could be caused by NULL-ptr deref or user memory access
+ general protection fault: 0000 [#1] SMP KASAN PTI
+ Modules linked in: sch_red
+ CPU: 0 PID: 28699 Comm: syz-executor5 Not tainted 4.17.0-rc4.kcov #1
+ Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+ RIP: 0010:qdisc_hash_add+0x26/0xa0
+ RSP: 0018:ffff8800589cf470 EFLAGS: 00010203
+ RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff824ad971
+ RDX: 0000000000000007 RSI: ffffc9000ce9f000 RDI: 000000000000003c
+ RBP: 0000000000000001 R08: ffffed000b139ea2 R09: ffff8800589cf4f0
+ R10: ffff8800589cf50f R11: ffffed000b139ea2 R12: ffff880054019fc0
+ R13: ffff880054019fb4 R14: ffff88005c0af600 R15: ffff880054019fb0
+ FS:  00007fa6edcb1700(0000) GS:ffff88005ce00000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000020000740 CR3: 000000000fc16000 CR4: 00000000000006f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+  red_change+0x2d2/0xed0 [sch_red]
+  qdisc_create+0x57e/0xef0
+  tc_modify_qdisc+0x47f/0x14e0
+  rtnetlink_rcv_msg+0x6a8/0x920
+  netlink_rcv_skb+0x2a2/0x3c0
+  netlink_unicast+0x511/0x740
+  netlink_sendmsg+0x825/0xc30
+  sock_sendmsg+0xc5/0x100
+  ___sys_sendmsg+0x778/0x8e0
+  __sys_sendmsg+0xf5/0x1b0
+  do_syscall_64+0xbd/0x3b0
+  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x450869
+ RSP: 002b:00007fa6edcb0c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+ RAX: ffffffffffffffda RBX: 00007fa6edcb16b4 RCX: 0000000000450869
+ RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000013
+ RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
+ R13: 0000000000008778 R14: 0000000000702838 R15: 00007fa6edcb1700
+ Code: e9 0b fe ff ff 0f 1f 44 00 00 55 53 48 89 fb 89 f5 e8 3f 07 f3 fe 48 8d 7b 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 51
+ RIP: qdisc_hash_add+0x26/0xa0 RSP: ffff8800589cf470
+
+When a red qdisc is updated with a 0 limit, the child qdisc is left
+unmodified, no additional scheduler is created in red_change(),
+the 'child' local variable is rightfully NULL and must not add it
+to the hash table.
+
+This change addresses the above issue moving qdisc_hash_add() right
+after the child qdisc creation. It additionally removes unneeded checks
+for noop_qdisc.
+
+Reported-by: Hangbin Liu <liuhangbin@gmail.com>
+Fixes: 49b499718fa1 ("net: sched: make default fifo qdiscs appear in the dump")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_red.c |    5 +++--
+ net/sched/sch_tbf.c |    5 +++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch,
+                                        extack);
+               if (IS_ERR(child))
+                       return PTR_ERR(child);
+-      }
+ 
+-      if (child != &noop_qdisc)
++              /* child is fifo, no need to check for noop_qdisc */
+               qdisc_hash_add(child, true);
++      }
++
+       sch_tree_lock(sch);
+       q->flags = ctl->flags;
+       q->limit = ctl->limit;
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch,
+                       err = PTR_ERR(child);
+                       goto done;
+               }
++
++              /* child is fifo, no need to check for noop_qdisc */
++              qdisc_hash_add(child, true);
+       }
+ 
+       sch_tree_lock(sch);
+@@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch,
+                                         q->qdisc->qstats.backlog);
+               qdisc_destroy(q->qdisc);
+               q->qdisc = child;
+-              if (child != &noop_qdisc)
+-                      qdisc_hash_add(child, true);
+       }
+       q->limit = qopt->limit;
+       if (tb[TCA_TBF_PBURST])
diff --git a/queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch b/queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch

new file mode 100644 (file)

index 0000000..43b051c
--- /dev/null
+++ b/queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch
@@ -0,0 +1,132 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 13 May 2018 17:01:30 -0700
+Subject: net/smc: check for missing nlattrs in SMC_PNETID messages
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit d49baa7e12ee70c0a7b821d088a770c94c02e494 ]
+
+It's possible to crash the kernel in several different ways by sending
+messages to the SMC_PNETID generic netlink family that are missing the
+expected attributes:
+
+- Missing SMC_PNETID_NAME => null pointer dereference when comparing
+  names.
+- Missing SMC_PNETID_ETHNAME => null pointer dereference accessing
+  smc_pnetentry::ndev.
+- Missing SMC_PNETID_IBNAME => null pointer dereference accessing
+  smc_pnetentry::smcibdev.
+- Missing SMC_PNETID_IBPORT => out of bounds array access to
+  smc_ib_device::pattr[-1].
+
+Fix it by validating that all expected attributes are present and that
+SMC_PNETID_IBPORT is nonzero.
+
+Reported-by: syzbot+5cd61039dc9b8bfa6e47@syzkaller.appspotmail.com
+Fixes: 6812baabf24d ("smc: establish pnet table management")
+Cc: <stable@vger.kernel.org> # v4.11+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/smc/smc_pnet.c |   71 +++++++++++++++++++++++++++++------------------------
+ 1 file changed, 40 insertions(+), 31 deletions(-)
+
+--- a/net/smc/smc_pnet.c
++++ b/net/smc/smc_pnet.c
+@@ -245,40 +245,45 @@ out:
+ static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
+                              struct nlattr *tb[])
+ {
+-      char *string, *ibname = NULL;
+-      int rc = 0;
++      char *string, *ibname;
++      int rc;
+ 
+       memset(pnetelem, 0, sizeof(*pnetelem));
+       INIT_LIST_HEAD(&pnetelem->list);
+-      if (tb[SMC_PNETID_NAME]) {
+-              string = (char *)nla_data(tb[SMC_PNETID_NAME]);
+-              if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
+-                      rc = -EINVAL;
+-                      goto error;
+-              }
+-      }
+-      if (tb[SMC_PNETID_ETHNAME]) {
+-              string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+-              pnetelem->ndev = dev_get_by_name(net, string);
+-              if (!pnetelem->ndev)
+-                      return -ENOENT;
+-      }
+-      if (tb[SMC_PNETID_IBNAME]) {
+-              ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+-              ibname = strim(ibname);
+-              pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+-              if (!pnetelem->smcibdev) {
+-                      rc = -ENOENT;
+-                      goto error;
+-              }
+-      }
+-      if (tb[SMC_PNETID_IBPORT]) {
+-              pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+-              if (pnetelem->ib_port > SMC_MAX_PORTS) {
+-                      rc = -EINVAL;
+-                      goto error;
+-              }
+-      }
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_NAME])
++              goto error;
++      string = (char *)nla_data(tb[SMC_PNETID_NAME]);
++      if (!smc_pnetid_valid(string, pnetelem->pnet_name))
++              goto error;
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_ETHNAME])
++              goto error;
++      rc = -ENOENT;
++      string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
++      pnetelem->ndev = dev_get_by_name(net, string);
++      if (!pnetelem->ndev)
++              goto error;
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_IBNAME])
++              goto error;
++      rc = -ENOENT;
++      ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
++      ibname = strim(ibname);
++      pnetelem->smcibdev = smc_pnet_find_ib(ibname);
++      if (!pnetelem->smcibdev)
++              goto error;
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_IBPORT])
++              goto error;
++      pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
++      if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
++              goto error;
++
+       return 0;
+ 
+ error:
+@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *
+       void *hdr;
+       int rc;
+ 
++      if (!info->attrs[SMC_PNETID_NAME])
++              return -EINVAL;
+       pnetelem = smc_pnet_find_pnetid(
+                               (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
+       if (!pnetelem)
+@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *
+ 
+ static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
+ {
++      if (!info->attrs[SMC_PNETID_NAME])
++              return -EINVAL;
+       return smc_pnet_remove_by_pnetid(
+                               (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
+ }
diff --git a/queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch b/queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch

new file mode 100644 (file)

index 0000000..3964231
--- /dev/null
+++ b/queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch
@@ -0,0 +1,54 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 17 May 2018 13:13:29 -0400
+Subject: net: test tailroom before appending to linear skb
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 113f99c3358564a0647d444c2ae34e8b1abfd5b9 ]
+
+Device features may change during transmission. In particular with
+corking, a device may toggle scatter-gather in between allocating
+and writing to an skb.
+
+Do not unconditionally assume that !NETIF_F_SG at write time implies
+that the same held at alloc time and thus the skb has sufficient
+tailroom.
+
+This issue predates git history.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c  |    3 ++-
+ net/ipv6/ip6_output.c |    3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1040,7 +1040,8 @@ alloc_new_skb:
+               if (copy > length)
+                       copy = length;
+ 
+-              if (!(rt->dst.dev->features&NETIF_F_SG)) {
++              if (!(rt->dst.dev->features&NETIF_F_SG) &&
++                  skb_tailroom(skb) >= copy) {
+                       unsigned int off;
+ 
+                       off = skb->len;
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1489,7 +1489,8 @@ alloc_new_skb:
+               if (copy > length)
+                       copy = length;
+ 
+-              if (!(rt->dst.dev->features&NETIF_F_SG)) {
++              if (!(rt->dst.dev->features&NETIF_F_SG) &&
++                  skb_tailroom(skb) >= copy) {
+                       unsigned int off;
+ 
+                       off = skb->len;
diff --git a/queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch b/queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch

new file mode 100644 (file)

index 0000000..cead00a
--- /dev/null
+++ b/queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch
@@ -0,0 +1,56 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Fri, 11 May 2018 13:24:25 -0400
+Subject: packet: in packet_snd start writing at link layer allocation
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba ]
+
+Packet sockets allow construction of packets shorter than
+dev->hard_header_len to accommodate protocols with variable length
+link layer headers. These packets are padded to dev->hard_header_len,
+because some device drivers interpret that as a minimum packet size.
+
+packet_snd reserves dev->hard_header_len bytes on allocation.
+SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that
+link layer headers are stored in the reserved range. SOCK_RAW sockets
+do the same in tpacket_snd, but not in packet_snd.
+
+Syzbot was able to send a zero byte packet to a device with massive
+116B link layer header, causing padding to cross over into skb_shinfo.
+Fix this by writing from the start of the llheader reserved range also
+in the case of packet_snd/SOCK_RAW.
+
+Update skb_set_network_header to the new offset. This also corrects
+it for SOCK_DGRAM, where it incorrectly double counted reserve due to
+the skb_push in dev_hard_header.
+
+Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
+Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2903,13 +2903,15 @@ static int packet_snd(struct socket *soc
+       if (skb == NULL)
+               goto out_unlock;
+ 
+-      skb_set_network_header(skb, reserve);
++      skb_reset_network_header(skb);
+ 
+       err = -EINVAL;
+       if (sock->type == SOCK_DGRAM) {
+               offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
+               if (unlikely(offset < 0))
+                       goto out_free;
++      } else if (reserve) {
++              skb_push(skb, reserve);
+       }
+ 
+       /* Returns -EFAULT on error */
diff --git a/queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch b/queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch

new file mode 100644 (file)

index 0000000..b979d50
--- /dev/null
+++ b/queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch
@@ -0,0 +1,90 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Michal Kalderon <Michal.Kalderon@cavium.com>
+Date: Wed, 16 May 2018 14:44:40 +0300
+Subject: qed: Fix LL2 race during connection terminate
+
+From: Michal Kalderon <Michal.Kalderon@cavium.com>
+
+[ Upstream commit 490068deaef0c76e47bf89c457de899b7d3995c7 ]
+
+Stress on qedi/qedr load unload lead to list_del corruption.
+This is due to ll2 connection terminate freeing resources without
+verifying that no more ll2 processing will occur.
+
+This patch unregisters the ll2 status block before terminating
+the connection to assure this race does not occur.
+
+Fixes: 1d6cff4fca4366 ("qed: Add iSCSI out of order packet handling")
+Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
+Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_ll2.c |   24 ++++++++++++++----------
+ 1 file changed, 14 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+@@ -842,6 +842,9 @@ static int qed_ll2_lb_rxq_completion(str
+       struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
+       int rc;
+ 
++      if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
++              return 0;
++
+       rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn);
+       if (rc)
+               return rc;
+@@ -862,6 +865,9 @@ static int qed_ll2_lb_txq_completion(str
+       u16 new_idx = 0, num_bds = 0;
+       int rc;
+ 
++      if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
++              return 0;
++
+       new_idx = le16_to_cpu(*p_tx->p_fw_cons);
+       num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
+ 
+@@ -1915,17 +1921,25 @@ int qed_ll2_terminate_connection(void *c
+ 
+       /* Stop Tx & Rx of connection, if needed */
+       if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
++              p_ll2_conn->tx_queue.b_cb_registred = false;
++              smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
+               rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn);
+               if (rc)
+                       goto out;
++
+               qed_ll2_txq_flush(p_hwfn, connection_handle);
++              qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
+       }
+ 
+       if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
++              p_ll2_conn->rx_queue.b_cb_registred = false;
++              smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
+               rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
+               if (rc)
+                       goto out;
++
+               qed_ll2_rxq_flush(p_hwfn, connection_handle);
++              qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
+       }
+ 
+       if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
+@@ -1973,16 +1987,6 @@ void qed_ll2_release_connection(void *cx
+       if (!p_ll2_conn)
+               return;
+ 
+-      if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+-              p_ll2_conn->rx_queue.b_cb_registred = false;
+-              qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
+-      }
+-
+-      if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
+-              p_ll2_conn->tx_queue.b_cb_registred = false;
+-              qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
+-      }
+-
+       kfree(p_ll2_conn->tx_queue.descq_mem);
+       qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
+ 
diff --git a/queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch b/queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch

new file mode 100644 (file)

index 0000000..0bff47d
--- /dev/null
+++ b/queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch
@@ -0,0 +1,92 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Michal Kalderon <Michal.Kalderon@cavium.com>
+Date: Wed, 16 May 2018 14:44:39 +0300
+Subject: qed: Fix possibility of list corruption during rmmod flows
+
+From: Michal Kalderon <Michal.Kalderon@cavium.com>
+
+[ Upstream commit ffd2c0d12752a69e480366031ec7a7d723dd2510 ]
+
+The ll2 flows of flushing the txq/rxq need to be synchronized with the
+regular fp processing. Caused list corruption during load/unload stress
+tests.
+
+Fixes: 0a7fb11c23c0f ("qed: Add Light L2 support")
+Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
+Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_ll2.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+@@ -292,6 +292,7 @@ static void qed_ll2_txq_flush(struct qed
+       struct qed_ll2_tx_packet *p_pkt = NULL;
+       struct qed_ll2_info *p_ll2_conn;
+       struct qed_ll2_tx_queue *p_tx;
++      unsigned long flags = 0;
+       dma_addr_t tx_frag;
+ 
+       p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
+@@ -300,6 +301,7 @@ static void qed_ll2_txq_flush(struct qed
+ 
+       p_tx = &p_ll2_conn->tx_queue;
+ 
++      spin_lock_irqsave(&p_tx->lock, flags);
+       while (!list_empty(&p_tx->active_descq)) {
+               p_pkt = list_first_entry(&p_tx->active_descq,
+                                        struct qed_ll2_tx_packet, list_entry);
+@@ -309,6 +311,7 @@ static void qed_ll2_txq_flush(struct qed
+               list_del(&p_pkt->list_entry);
+               b_last_packet = list_empty(&p_tx->active_descq);
+               list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
++              spin_unlock_irqrestore(&p_tx->lock, flags);
+               if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
+                       struct qed_ooo_buffer *p_buffer;
+ 
+@@ -328,7 +331,9 @@ static void qed_ll2_txq_flush(struct qed
+                                                     b_last_frag,
+                                                     b_last_packet);
+               }
++              spin_lock_irqsave(&p_tx->lock, flags);
+       }
++      spin_unlock_irqrestore(&p_tx->lock, flags);
+ }
+ 
+ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
+@@ -556,6 +561,7 @@ static void qed_ll2_rxq_flush(struct qed
+       struct qed_ll2_info *p_ll2_conn = NULL;
+       struct qed_ll2_rx_packet *p_pkt = NULL;
+       struct qed_ll2_rx_queue *p_rx;
++      unsigned long flags = 0;
+ 
+       p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
+       if (!p_ll2_conn)
+@@ -563,13 +569,14 @@ static void qed_ll2_rxq_flush(struct qed
+ 
+       p_rx = &p_ll2_conn->rx_queue;
+ 
++      spin_lock_irqsave(&p_rx->lock, flags);
+       while (!list_empty(&p_rx->active_descq)) {
+               p_pkt = list_first_entry(&p_rx->active_descq,
+                                        struct qed_ll2_rx_packet, list_entry);
+               if (!p_pkt)
+                       break;
+-
+               list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
++              spin_unlock_irqrestore(&p_rx->lock, flags);
+ 
+               if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
+                       struct qed_ooo_buffer *p_buffer;
+@@ -588,7 +595,9 @@ static void qed_ll2_rxq_flush(struct qed
+                                                     cookie,
+                                                     rx_buf_addr, b_last);
+               }
++              spin_lock_irqsave(&p_rx->lock, flags);
+       }
++      spin_unlock_irqrestore(&p_rx->lock, flags);
+ }
+ 
+ static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags)
diff --git a/queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch b/queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch

new file mode 100644 (file)

index 0000000..b880e1a
--- /dev/null
+++ b/queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch
@@ -0,0 +1,63 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Michal Kalderon <Michal.Kalderon@cavium.com>
+Date: Wed, 16 May 2018 14:44:38 +0300
+Subject: qed: LL2 flush isles when connection is closed
+
+From: Michal Kalderon <Michal.Kalderon@cavium.com>
+
+[ Upstream commit f9bcd60274a565751abef622f9018badd01a17c8 ]
+
+Driver should free all pending isles once it gets a FLUSH cqe from FW.
+Part of iSCSI out of order flow.
+
+Fixes: 1d6cff4fca4366 ("qed: Add iSCSI out of order packet handling")
+Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
+Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_ll2.c |   26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+@@ -601,6 +601,27 @@ static u8 qed_ll2_convert_rx_parse_to_tx
+       return bd_flags;
+ }
+ 
++static bool
++qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn,
++                              struct core_rx_slow_path_cqe *p_cqe)
++{
++      struct ooo_opaque *iscsi_ooo;
++      u32 cid;
++
++      if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH)
++              return false;
++
++      iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data;
++      if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES)
++              return false;
++
++      /* Need to make a flush */
++      cid = le32_to_cpu(iscsi_ooo->cid);
++      qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid);
++
++      return true;
++}
++
+ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
+                                 struct qed_ll2_info *p_ll2_conn)
+ {
+@@ -627,6 +648,11 @@ static int qed_ll2_lb_rxq_handler(struct
+               cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+               cqe_type = cqe->rx_cqe_sp.type;
+ 
++              if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH)
++                      if (qed_ll2_lb_rxq_handler_slowpath(p_hwfn,
++                                                          &cqe->rx_cqe_sp))
++                              continue;
++
+               if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) {
+                       DP_NOTICE(p_hwfn,
+                                 "Got a non-regular LB LL2 completion [type 0x%02x]\n",
diff --git a/queue-4.16/series b/queue-4.16/series

index 86dec53404fe46c5310b9ede20c3fe596ff86a5f..8f8fd865f0069c10e231632cd145aa5a9f6ee957 100644 (file)
--- a/queue-4.16/series
+++ b/queue-4.16/series
@@ -1 +1,33 @@
  net-mlx5-fix-build-break-when-config_smp-n.patch
+net-fix-a-bug-in-removing-queues-from-xps-map.patch
+net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch
+net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch
+net-sched-red-avoid-hashing-null-child.patch
+net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch
+net-test-tailroom-before-appending-to-linear-skb.patch
+packet-in-packet_snd-start-writing-at-link-layer-allocation.patch
+sock_diag-fix-use-after-free-read-in-__sk_free.patch
+tcp-purge-write-queue-in-tcp_connect_init.patch
+tun-fix-use-after-free-for-ptr_ring.patch
+tuntap-fix-use-after-free-during-release.patch
+cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch
+net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch
+net-dsa-do-not-register-devlink-for-unused-ports.patch
+net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch
+net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch
+3c59x-convert-to-generic-dma-api.patch
+cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch
+vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch
+vmxnet3-use-dma-memory-barriers-where-required.patch
+net-ip6_gre-request-headroom-in-__gre6_xmit.patch
+net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch
+net-ip6_gre-split-up-ip6gre_tnl_link_config.patch
+net-ip6_gre-split-up-ip6gre_tnl_change.patch
+net-ip6_gre-split-up-ip6gre_newlink.patch
+net-ip6_gre-split-up-ip6gre_changelink.patch
+net-ip6_gre-fix-ip6erspan-hlen-calculation.patch
+net-ip6_gre-fix-tunnel-metadata-device-sharing.patch
+qed-ll2-flush-isles-when-connection-is-closed.patch
+qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch
+qed-fix-ll2-race-during-connection-terminate.patch
+sparc-vio-use-put_device-instead-of-kfree.patch
diff --git a/queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch b/queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch

new file mode 100644 (file)

index 0000000..ded9d8d
--- /dev/null
+++ b/queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch
@@ -0,0 +1,128 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 18 May 2018 04:47:55 -0700
+Subject: sock_diag: fix use-after-free read in __sk_free
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9709020c86f6bf8439ca3effc58cfca49a5de192 ]
+
+We must not call sock_diag_has_destroy_listeners(sk) on a socket
+that has no reference on net structure.
+
+BUG: KASAN: use-after-free in sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
+BUG: KASAN: use-after-free in __sk_free+0x329/0x340 net/core/sock.c:1609
+Read of size 8 at addr ffff88018a02e3a0 by task swapper/1/0
+
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.17.0-rc5+ #54
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1b9/0x294 lib/dump_stack.c:113
+ print_address_description+0x6c/0x20b mm/kasan/report.c:256
+ kasan_report_error mm/kasan/report.c:354 [inline]
+ kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
+ sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
+ __sk_free+0x329/0x340 net/core/sock.c:1609
+ sk_free+0x42/0x50 net/core/sock.c:1623
+ sock_put include/net/sock.h:1664 [inline]
+ reqsk_free include/net/request_sock.h:116 [inline]
+ reqsk_put include/net/request_sock.h:124 [inline]
+ inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:672 [inline]
+ reqsk_timer_handler+0xe27/0x10e0 net/ipv4/inet_connection_sock.c:739
+ call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1d1/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:525 [inline]
+ smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
+ </IRQ>
+RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:54
+RSP: 0018:ffff8801d9ae7c38 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13
+RAX: dffffc0000000000 RBX: 1ffff1003b35cf8a RCX: 0000000000000000
+RDX: 1ffffffff11a30d0 RSI: 0000000000000001 RDI: ffffffff88d18680
+RBP: ffff8801d9ae7c38 R08: ffffed003b5e46c3 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001
+R13: ffff8801d9ae7cf0 R14: ffffffff897bef20 R15: 0000000000000000
+ arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline]
+ default_idle+0xc2/0x440 arch/x86/kernel/process.c:354
+ arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:345
+ default_idle_call+0x6d/0x90 kernel/sched/idle.c:93
+ cpuidle_idle_call kernel/sched/idle.c:153 [inline]
+ do_idle+0x395/0x560 kernel/sched/idle.c:262
+ cpu_startup_entry+0x104/0x120 kernel/sched/idle.c:368
+ start_secondary+0x426/0x5b0 arch/x86/kernel/smpboot.c:269
+ secondary_startup_64+0xa5/0xb0 arch/x86/kernel/head_64.S:242
+
+Allocated by task 4557:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
+ kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
+ kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554
+ kmem_cache_zalloc include/linux/slab.h:691 [inline]
+ net_alloc net/core/net_namespace.c:383 [inline]
+ copy_net_ns+0x159/0x4c0 net/core/net_namespace.c:423
+ create_new_namespaces+0x69d/0x8f0 kernel/nsproxy.c:107
+ unshare_nsproxy_namespaces+0xc3/0x1f0 kernel/nsproxy.c:206
+ ksys_unshare+0x708/0xf90 kernel/fork.c:2408
+ __do_sys_unshare kernel/fork.c:2476 [inline]
+ __se_sys_unshare kernel/fork.c:2474 [inline]
+ __x64_sys_unshare+0x31/0x40 kernel/fork.c:2474
+ do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Freed by task 69:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
+ kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
+ __cache_free mm/slab.c:3498 [inline]
+ kmem_cache_free+0x86/0x2d0 mm/slab.c:3756
+ net_free net/core/net_namespace.c:399 [inline]
+ net_drop_ns.part.14+0x11a/0x130 net/core/net_namespace.c:406
+ net_drop_ns net/core/net_namespace.c:405 [inline]
+ cleanup_net+0x6a1/0xb20 net/core/net_namespace.c:541
+ process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
+ worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
+ kthread+0x345/0x410 kernel/kthread.c:240
+ ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
+
+The buggy address belongs to the object at ffff88018a02c140
+ which belongs to the cache net_namespace of size 8832
+The buggy address is located 8800 bytes inside of
+ 8832-byte region [ffff88018a02c140, ffff88018a02e3c0)
+The buggy address belongs to the page:
+page:ffffea0006280b00 count:1 mapcount:0 mapping:ffff88018a02c140 index:0x0 compound_mapcount: 0
+flags: 0x2fffc0000008100(slab|head)
+raw: 02fffc0000008100 ffff88018a02c140 0000000000000000 0000000100000001
+raw: ffffea00062a1320 ffffea0006268020 ffff8801d9bdde40 0000000000000000
+page dumped because: kasan: bad access detected
+
+Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Craig Gallek <kraig@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1603,7 +1603,7 @@ static void __sk_free(struct sock *sk)
+       if (likely(sk->sk_net_refcnt))
+               sock_inuse_add(sock_net(sk), -1);
+ 
+-      if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
++      if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
+               sock_diag_broadcast_destroy(sk);
+       else
+               sk_destruct(sk);
diff --git a/queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch b/queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch

new file mode 100644 (file)

index 0000000..d8831d1
--- /dev/null
+++ b/queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch
@@ -0,0 +1,31 @@
+From 00ad691ab140b54ab9f5de5e74cb994f552e8124 Mon Sep 17 00:00:00 2001
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Date: Wed, 25 Apr 2018 20:26:14 +0530
+Subject: sparc: vio: use put_device() instead of kfree()
+
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+
+[ Upstream commit 00ad691ab140b54ab9f5de5e74cb994f552e8124 ]
+
+Never directly free @dev after calling device_register(), even
+if it returned an error. Always use put_device() to give up the
+reference initialized.
+
+Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/vio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sparc/kernel/vio.c
++++ b/arch/sparc/kernel/vio.c
+@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(st
+       if (err) {
+               printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
+                      dev_name(&vdev->dev), err);
+-              kfree(vdev);
++              put_device(&vdev->dev);
+               return NULL;
+       }
+       if (vdev->dp)
diff --git a/queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch b/queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch

new file mode 100644 (file)

index 0000000..52cc1e1
--- /dev/null
+++ b/queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch
@@ -0,0 +1,88 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 14 May 2018 21:14:26 -0700
+Subject: tcp: purge write queue in tcp_connect_init()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7f582b248d0a86bae5788c548d7bb5bca6f7691a ]
+
+syzkaller found a reliable way to crash the host, hitting a BUG()
+in __tcp_retransmit_skb()
+
+Malicous MSG_FASTOPEN is the root cause. We need to purge write queue
+in tcp_connect_init() at the point we init snd_una/write_seq.
+
+This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE()
+
+kernel BUG at net/ipv4/tcp_output.c:2837!
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in:
+CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837
+RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206
+RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49
+RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005
+RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2
+R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad
+R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80
+FS:  0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900
+CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
+CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <IRQ>
+ tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923
+ tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488
+ tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573
+ tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593
+ call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1d1/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:525 [inline]
+ smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
+
+Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2860,8 +2860,10 @@ int __tcp_retransmit_skb(struct sock *sk
+               return -EBUSY;
+ 
+       if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+-              if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+-                      BUG();
++              if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
++                      WARN_ON_ONCE(1);
++                      return -EINVAL;
++              }
+               if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
+                       return -ENOMEM;
+       }
+@@ -3369,6 +3371,7 @@ static void tcp_connect_init(struct sock
+       sock_reset_flag(sk, SOCK_DONE);
+       tp->snd_wnd = 0;
+       tcp_init_wl(tp, 0);
++      tcp_write_queue_purge(sk);
+       tp->snd_una = tp->write_seq;
+       tp->snd_sml = tp->write_seq;
+       tp->snd_up = tp->write_seq;
diff --git a/queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch b/queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch

new file mode 100644 (file)

index 0000000..dafcbea
--- /dev/null
+++ b/queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch
@@ -0,0 +1,116 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Jason Wang <jasowang@redhat.com>
+Date: Fri, 11 May 2018 10:49:25 +0800
+Subject: tun: fix use after free for ptr_ring
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit b196d88aba8ac72b775137854121097f4c4c6862 ]
+
+We used to initialize ptr_ring during TUNSETIFF, this is because its
+size depends on the tx_queue_len of netdevice. And we try to clean it
+up when socket were detached from netdevice. A race were spotted when
+trying to do uninit during a read which will lead a use after free for
+pointer ring. Solving this by always initialize a zero size ptr_ring
+in open() and do resizing during TUNSETIFF, and then we can safely do
+cleanup during close(). With this, there's no need for the workaround
+that was introduced by commit 4df0bfc79904 ("tun: fix a memory leak
+for tfile->tx_array").
+
+Reported-by: syzbot+e8b902c3c3fadf0a9dba@syzkaller.appspotmail.com
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Fixes: 1576d9860599 ("tun: switch to use skb array for tx")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |   27 ++++++++++++---------------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -680,15 +680,6 @@ static void tun_queue_purge(struct tun_f
+       skb_queue_purge(&tfile->sk.sk_error_queue);
+ }
+ 
+-static void tun_cleanup_tx_ring(struct tun_file *tfile)
+-{
+-      if (tfile->tx_ring.queue) {
+-              ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+-              xdp_rxq_info_unreg(&tfile->xdp_rxq);
+-              memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
+-      }
+-}
+-
+ static void __tun_detach(struct tun_file *tfile, bool clean)
+ {
+       struct tun_file *ntfile;
+@@ -735,7 +726,8 @@ static void __tun_detach(struct tun_file
+                           tun->dev->reg_state == NETREG_REGISTERED)
+                               unregister_netdevice(tun->dev);
+               }
+-              tun_cleanup_tx_ring(tfile);
++              if (tun)
++                      xdp_rxq_info_unreg(&tfile->xdp_rxq);
+               sock_put(&tfile->sk);
+       }
+ }
+@@ -775,14 +767,14 @@ static void tun_detach_all(struct net_de
+               tun_napi_del(tun, tfile);
+               /* Drop read queue */
+               tun_queue_purge(tfile);
++              xdp_rxq_info_unreg(&tfile->xdp_rxq);
+               sock_put(&tfile->sk);
+-              tun_cleanup_tx_ring(tfile);
+       }
+       list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
+               tun_enable_queue(tfile);
+               tun_queue_purge(tfile);
++              xdp_rxq_info_unreg(&tfile->xdp_rxq);
+               sock_put(&tfile->sk);
+-              tun_cleanup_tx_ring(tfile);
+       }
+       BUG_ON(tun->numdisabled != 0);
+ 
+@@ -826,7 +818,8 @@ static int tun_attach(struct tun_struct
+       }
+ 
+       if (!tfile->detached &&
+-          ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
++          ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
++                          GFP_KERNEL, tun_ptr_free)) {
+               err = -ENOMEM;
+               goto out;
+       }
+@@ -3131,6 +3124,11 @@ static int tun_chr_open(struct inode *in
+                                           &tun_proto, 0);
+       if (!tfile)
+               return -ENOMEM;
++      if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
++              sk_free(&tfile->sk);
++              return -ENOMEM;
++      }
++
+       RCU_INIT_POINTER(tfile->tun, NULL);
+       tfile->flags = 0;
+       tfile->ifindex = 0;
+@@ -3151,8 +3149,6 @@ static int tun_chr_open(struct inode *in
+ 
+       sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
+ 
+-      memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
+-
+       return 0;
+ }
+ 
+@@ -3161,6 +3157,7 @@ static int tun_chr_close(struct inode *i
+       struct tun_file *tfile = file->private_data;
+ 
+       tun_detach(tfile, true);
++      ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+ 
+       return 0;
+ }
diff --git a/queue-4.16/tuntap-fix-use-after-free-during-release.patch b/queue-4.16/tuntap-fix-use-after-free-during-release.patch

new file mode 100644 (file)

index 0000000..7cffdcc
--- /dev/null
+++ b/queue-4.16/tuntap-fix-use-after-free-during-release.patch
@@ -0,0 +1,43 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 16 May 2018 20:39:33 +0800
+Subject: tuntap: fix use after free during release
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 7063efd33bb15abc0160347f89eb5aba6b7d000e ]
+
+After commit b196d88aba8a ("tun: fix use after free for ptr_ring") we
+need clean up tx ring during release(). But unfortunately, it tries to
+do the cleanup blindly after socket were destroyed which will lead
+another use-after-free. Fix this by doing the cleanup before dropping
+the last reference of the socket in __tun_detach().
+
+Reported-by: Andrei Vagin <avagin@virtuozzo.com>
+Acked-by: Andrei Vagin <avagin@virtuozzo.com>
+Fixes: b196d88aba8a ("tun: fix use after free for ptr_ring")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -728,6 +728,7 @@ static void __tun_detach(struct tun_file
+               }
+               if (tun)
+                       xdp_rxq_info_unreg(&tfile->xdp_rxq);
++              ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+               sock_put(&tfile->sk);
+       }
+ }
+@@ -3157,7 +3158,6 @@ static int tun_chr_close(struct inode *i
+       struct tun_file *tfile = file->private_data;
+ 
+       tun_detach(tfile, true);
+-      ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+ 
+       return 0;
+ }
diff --git a/queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch b/queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch

new file mode 100644 (file)

index 0000000..bd6ab6b
--- /dev/null
+++ b/queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch
@@ -0,0 +1,153 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+Date: Mon, 14 May 2018 08:14:34 -0400
+Subject: vmxnet3: set the DMA mask before the first DMA map operation
+
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+
+[ Upstream commit 61aeecea40afb2b89933e27cd4adb10fc2e75cfd ]
+
+The DMA mask must be set before, not after, the first DMA map operation, or
+the first DMA map operation could in theory fail on some systems.
+
+Fixes: b0eb57cb97e78 ("VMXNET3: Add support for virtual IOMMU")
+Signed-off-by: Regis Duchesne <hpreg@vmware.com>
+Acked-by: Ronak Doshi <doshir@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c |   50 +++++++++++++++++++-------------------
+ drivers/net/vmxnet3/vmxnet3_int.h |    8 +++---
+ 2 files changed, 30 insertions(+), 28 deletions(-)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -2688,7 +2688,7 @@ vmxnet3_set_mac_addr(struct net_device *
+ /* ==================== initialization and cleanup routines ============ */
+ 
+ static int
+-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
++vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
+ {
+       int err;
+       unsigned long mmio_start, mmio_len;
+@@ -2700,30 +2700,12 @@ vmxnet3_alloc_pci_resources(struct vmxne
+               return err;
+       }
+ 
+-      if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+-              if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+-                      dev_err(&pdev->dev,
+-                              "pci_set_consistent_dma_mask failed\n");
+-                      err = -EIO;
+-                      goto err_set_mask;
+-              }
+-              *dma64 = true;
+-      } else {
+-              if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+-                      dev_err(&pdev->dev,
+-                              "pci_set_dma_mask failed\n");
+-                      err = -EIO;
+-                      goto err_set_mask;
+-              }
+-              *dma64 = false;
+-      }
+-
+       err = pci_request_selected_regions(pdev, (1 << 2) - 1,
+                                          vmxnet3_driver_name);
+       if (err) {
+               dev_err(&pdev->dev,
+                       "Failed to request region for adapter: error %d\n", err);
+-              goto err_set_mask;
++              goto err_enable_device;
+       }
+ 
+       pci_set_master(pdev);
+@@ -2751,7 +2733,7 @@ err_bar1:
+       iounmap(adapter->hw_addr0);
+ err_ioremap:
+       pci_release_selected_regions(pdev, (1 << 2) - 1);
+-err_set_mask:
++err_enable_device:
+       pci_disable_device(pdev);
+       return err;
+ }
+@@ -3254,7 +3236,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+ #endif
+       };
+       int err;
+-      bool dma64 = false; /* stupid gcc */
++      bool dma64;
+       u32 ver;
+       struct net_device *netdev;
+       struct vmxnet3_adapter *adapter;
+@@ -3300,6 +3282,24 @@ vmxnet3_probe_device(struct pci_dev *pde
+       adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
+       adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
+ 
++      if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
++              if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
++                      dev_err(&pdev->dev,
++                              "pci_set_consistent_dma_mask failed\n");
++                      err = -EIO;
++                      goto err_set_mask;
++              }
++              dma64 = true;
++      } else {
++              if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
++                      dev_err(&pdev->dev,
++                              "pci_set_dma_mask failed\n");
++                      err = -EIO;
++                      goto err_set_mask;
++              }
++              dma64 = false;
++      }
++
+       spin_lock_init(&adapter->cmd_lock);
+       adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
+                                            sizeof(struct vmxnet3_adapter),
+@@ -3307,7 +3307,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+       if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
+               dev_err(&pdev->dev, "Failed to map dma\n");
+               err = -EFAULT;
+-              goto err_dma_map;
++              goto err_set_mask;
+       }
+       adapter->shared = dma_alloc_coherent(
+                               &adapter->pdev->dev,
+@@ -3358,7 +3358,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+       }
+ #endif /* VMXNET3_RSS */
+ 
+-      err = vmxnet3_alloc_pci_resources(adapter, &dma64);
++      err = vmxnet3_alloc_pci_resources(adapter);
+       if (err < 0)
+               goto err_alloc_pci;
+ 
+@@ -3504,7 +3504,7 @@ err_alloc_queue_desc:
+ err_alloc_shared:
+       dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
+                        sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+-err_dma_map:
++err_set_mask:
+       free_netdev(netdev);
+       return err;
+ }
+--- a/drivers/net/vmxnet3/vmxnet3_int.h
++++ b/drivers/net/vmxnet3/vmxnet3_int.h
+@@ -69,10 +69,12 @@
+ /*
+  * Version numbers
+  */
+-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.14.0-k"
++#define VMXNET3_DRIVER_VERSION_STRING   "1.4.15.0-k"
+ 
+-/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
+-#define VMXNET3_DRIVER_VERSION_NUM      0x01040e00
++/* Each byte of this 32-bit integer encodes a version number in
++ * VMXNET3_DRIVER_VERSION_STRING.
++ */
++#define VMXNET3_DRIVER_VERSION_NUM      0x01040f00
+ 
+ #if defined(CONFIG_PCI_MSI)
+       /* RSS only makes sense if MSI-X is supported. */
diff --git a/queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch b/queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch

new file mode 100644 (file)

index 0000000..b039e40
--- /dev/null
+++ b/queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch
@@ -0,0 +1,91 @@
+From foo@baz Tue May 22 20:10:23 CEST 2018
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+Date: Mon, 14 May 2018 08:14:49 -0400
+Subject: vmxnet3: use DMA memory barriers where required
+
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+
+[ Upstream commit f3002c1374fb2367c9d8dbb28852791ef90d2bac ]
+
+The gen bits must be read first from (resp. written last to) DMA memory.
+The proper way to enforce this on Linux is to call dma_rmb() (resp.
+dma_wmb()).
+
+Signed-off-by: Regis Duchesne <hpreg@vmware.com>
+Acked-by: Ronak Doshi <doshir@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c |   22 ++++++++++++++++++++++
+ drivers/net/vmxnet3/vmxnet3_int.h |    4 ++--
+ 2 files changed, 24 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx
+ 
+       gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
+       while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
++              /* Prevent any &gdesc->tcd field from being (speculatively)
++               * read before (&gdesc->tcd)->gen is read.
++               */
++              dma_rmb();
++
+               completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
+                                              &gdesc->tcd), tq, adapter->pdev,
+                                              adapter);
+@@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, str
+               gdesc->txd.tci = skb_vlan_tag_get(skb);
+       }
+ 
++      /* Ensure that the write to (&gdesc->txd)->gen will be observed after
++       * all other writes to &gdesc->txd.
++       */
++      dma_wmb();
++
+       /* finally flips the GEN bit of the SOP desc. */
+       gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
+                                                 VMXNET3_TXD_GEN);
+@@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx
+                        */
+                       break;
+               }
++
++              /* Prevent any rcd field from being (speculatively) read before
++               * rcd->gen is read.
++               */
++              dma_rmb();
++
+               BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
+                      rcd->rqID != rq->dataRingQid);
+               idx = rcd->rxdIdx;
+@@ -1528,6 +1544,12 @@ rcd_done:
+               ring->next2comp = idx;
+               num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
+               ring = rq->rx_ring + ring_idx;
++
++              /* Ensure that the writes to rxd->gen bits will be observed
++               * after all other writes to rxd objects.
++               */
++              dma_wmb();
++
+               while (num_to_alloc) {
+                       vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
+                                         &rxCmdDesc);
+--- a/drivers/net/vmxnet3/vmxnet3_int.h
++++ b/drivers/net/vmxnet3/vmxnet3_int.h
+@@ -69,12 +69,12 @@
+ /*
+  * Version numbers
+  */
+-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.15.0-k"
++#define VMXNET3_DRIVER_VERSION_STRING   "1.4.16.0-k"
+ 
+ /* Each byte of this 32-bit integer encodes a version number in
+  * VMXNET3_DRIVER_VERSION_STRING.
+  */
+-#define VMXNET3_DRIVER_VERSION_NUM      0x01040f00
++#define VMXNET3_DRIVER_VERSION_NUM      0x01041000
+ 
+ #if defined(CONFIG_PCI_MSI)
+       /* RSS only makes sense if MSI-X is supported. */
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 22 May 2018 18:11:42 +0000 (20:11 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 22 May 2018 18:11:42 +0000 (20:11 +0200)
queue-4.16/3c59x-convert-to-generic-dma-api.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/cxgb4-correct-ntuple-mask-validation-for-hash-filters.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/cxgb4-fix-offset-in-collecting-tx-rate-limit-info.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rule-half-deletion.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-dsa-bcm_sf2-fix-ipv6-rules-and-chain-id.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-dsa-bcm_sf2-fix-rx_cls_loc_any-overwrite-for-last-rule.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-dsa-do-not-register-devlink-for-unused-ports.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-fix-a-bug-in-removing-queues-from-xps-map.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-fix-headroom-request-in-ip6erspan_tunnel_xmit.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-fix-ip6erspan-hlen-calculation.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-fix-tunnel-metadata-device-sharing.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-request-headroom-in-__gre6_xmit.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-split-up-ip6gre_changelink.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-split-up-ip6gre_newlink.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_change.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-ip6_gre-split-up-ip6gre_tnl_link_config.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-sched-red-avoid-hashing-null-child.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/net-test-tailroom-before-appending-to-linear-skb.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/qed-fix-ll2-race-during-connection-terminate.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/qed-fix-possibility-of-list-corruption-during-rmmod-flows.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/qed-ll2-flush-isles-when-connection-is-closed.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/series		patch \| blob \| blame \| history
queue-4.16/sock_diag-fix-use-after-free-read-in-__sk_free.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/sparc-vio-use-put_device-instead-of-kfree.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/tcp-purge-write-queue-in-tcp_connect_init.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/tun-fix-use-after-free-for-ptr_ring.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/tuntap-fix-use-after-free-during-release.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/vmxnet3-use-dma-memory-barriers-where-required.patch	[new file with mode: 0644]	patch \| blob