]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Mar 2017 16:24:37 +0000 (18:24 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Mar 2017 16:24:37 +0000 (18:24 +0200)
added patches:
amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
net-mlx5-increase-number-of-max-qps-in-default-profile.patch
net-mlx5e-count-lro-packets-correctly.patch
net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
net-properly-release-sk_frag.page.patch
net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch

12 files changed:
queue-4.4/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch [new file with mode: 0644]
queue-4.4/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch [new file with mode: 0644]
queue-4.4/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch [new file with mode: 0644]
queue-4.4/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch [new file with mode: 0644]
queue-4.4/net-mlx5-increase-number-of-max-qps-in-default-profile.patch [new file with mode: 0644]
queue-4.4/net-mlx5e-count-lro-packets-correctly.patch [new file with mode: 0644]
queue-4.4/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch [new file with mode: 0644]
queue-4.4/net-properly-release-sk_frag.page.patch [new file with mode: 0644]
queue-4.4/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch [new file with mode: 0644]
queue-4.4/series [new file with mode: 0644]
queue-4.4/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch [new file with mode: 0644]
queue-4.4/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch [new file with mode: 0644]

diff --git a/queue-4.4/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch b/queue-4.4/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
new file mode 100644 (file)
index 0000000..26746da
--- /dev/null
@@ -0,0 +1,284 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+Date: Wed, 15 Mar 2017 15:11:23 -0500
+Subject: amd-xgbe: Fix jumbo MTU processing on newer hardware
+
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+
+
+[ Upstream commit 622c36f143fc9566ba49d7cec994c2da1182d9e2 ]
+
+Newer hardware does not provide a cumulative payload length when multiple
+descriptors are needed to handle the data. Once the MTU increases beyond
+the size that can be handled by a single descriptor, the SKB does not get
+built properly by the driver.
+
+The driver will now calculate the size of the data buffers used by the
+hardware.  The first buffer of the first descriptor is for packet headers
+or packet headers and data when the headers can't be split. Subsequent
+descriptors in a multi-descriptor chain will not use the first buffer. The
+second buffer is used by all the descriptors in the chain for payload data.
+Based on whether the driver is processing the first, intermediate, or last
+descriptor it can calculate the buffer usage and build the SKB properly.
+
+Tested and verified on both old and new hardware.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-common.h |    6 +
+ drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |   20 +++--
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |  102 +++++++++++++++++-----------
+ 3 files changed, 78 insertions(+), 50 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+@@ -913,8 +913,8 @@
+ #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH  1
+ #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX  1
+ #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH  1
+-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX 2
+-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH 1
++#define RX_PACKET_ATTRIBUTES_LAST_INDEX               2
++#define RX_PACKET_ATTRIBUTES_LAST_WIDTH               1
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX       3
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH       1
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX    4
+@@ -923,6 +923,8 @@
+ #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH  1
+ #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX   6
+ #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH   1
++#define RX_PACKET_ATTRIBUTES_FIRST_INDEX      7
++#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH      1
+ #define RX_NORMAL_DESC0_OVT_INDEX             0
+ #define RX_NORMAL_DESC0_OVT_WIDTH             16
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+@@ -1658,10 +1658,15 @@ static int xgbe_dev_read(struct xgbe_cha
+       /* Get the header length */
+       if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) {
++              XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++                             FIRST, 1);
+               rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
+                                                     RX_NORMAL_DESC2, HL);
+               if (rdata->rx.hdr_len)
+                       pdata->ext_stats.rx_split_header_packets++;
++      } else {
++              XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++                             FIRST, 0);
+       }
+       /* Get the RSS hash */
+@@ -1684,19 +1689,16 @@ static int xgbe_dev_read(struct xgbe_cha
+               }
+       }
+-      /* Get the packet length */
+-      rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+-
+-      if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
+-              /* Not all the data has been transferred for this packet */
+-              XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+-                             INCOMPLETE, 1);
++      /* Not all the data has been transferred for this packet */
++      if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD))
+               return 0;
+-      }
+       /* This is the last of the data for this packet */
+       XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+-                     INCOMPLETE, 0);
++                     LAST, 1);
++
++      /* Get the packet length */
++      rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+       /* Set checksum done indicator as appropriate */
+       if (netdev->features & NETIF_F_RXCSUM)
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -1760,13 +1760,12 @@ static struct sk_buff *xgbe_create_skb(s
+ {
+       struct sk_buff *skb;
+       u8 *packet;
+-      unsigned int copy_len;
+       skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len);
+       if (!skb)
+               return NULL;
+-      /* Start with the header buffer which may contain just the header
++      /* Pull in the header buffer which may contain just the header
+        * or the header plus data
+        */
+       dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base,
+@@ -1775,30 +1774,49 @@ static struct sk_buff *xgbe_create_skb(s
+       packet = page_address(rdata->rx.hdr.pa.pages) +
+                rdata->rx.hdr.pa.pages_offset;
+-      copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len;
+-      copy_len = min(rdata->rx.hdr.dma_len, copy_len);
+-      skb_copy_to_linear_data(skb, packet, copy_len);
+-      skb_put(skb, copy_len);
+-
+-      len -= copy_len;
+-      if (len) {
+-              /* Add the remaining data as a frag */
+-              dma_sync_single_range_for_cpu(pdata->dev,
+-                                            rdata->rx.buf.dma_base,
+-                                            rdata->rx.buf.dma_off,
+-                                            rdata->rx.buf.dma_len,
+-                                            DMA_FROM_DEVICE);
+-
+-              skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+-                              rdata->rx.buf.pa.pages,
+-                              rdata->rx.buf.pa.pages_offset,
+-                              len, rdata->rx.buf.dma_len);
+-              rdata->rx.buf.pa.pages = NULL;
+-      }
++      skb_copy_to_linear_data(skb, packet, len);
++      skb_put(skb, len);
+       return skb;
+ }
++static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata,
++                                   struct xgbe_packet_data *packet)
++{
++      /* Always zero if not the first descriptor */
++      if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST))
++              return 0;
++
++      /* First descriptor with split header, return header length */
++      if (rdata->rx.hdr_len)
++              return rdata->rx.hdr_len;
++
++      /* First descriptor but not the last descriptor and no split header,
++       * so the full buffer was used
++       */
++      if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
++              return rdata->rx.hdr.dma_len;
++
++      /* First descriptor and last descriptor and no split header, so
++       * calculate how much of the buffer was used
++       */
++      return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len);
++}
++
++static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata,
++                                   struct xgbe_packet_data *packet,
++                                   unsigned int len)
++{
++      /* Always the full buffer if not the last descriptor */
++      if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
++              return rdata->rx.buf.dma_len;
++
++      /* Last descriptor so calculate how much of the buffer was used
++       * for the last bit of data
++       */
++      return rdata->rx.len - len;
++}
++
+ static int xgbe_tx_poll(struct xgbe_channel *channel)
+ {
+       struct xgbe_prv_data *pdata = channel->pdata;
+@@ -1881,8 +1899,8 @@ static int xgbe_rx_poll(struct xgbe_chan
+       struct napi_struct *napi;
+       struct sk_buff *skb;
+       struct skb_shared_hwtstamps *hwtstamps;
+-      unsigned int incomplete, error, context_next, context;
+-      unsigned int len, rdesc_len, max_len;
++      unsigned int last, error, context_next, context;
++      unsigned int len, buf1_len, buf2_len, max_len;
+       unsigned int received = 0;
+       int packet_count = 0;
+@@ -1892,7 +1910,7 @@ static int xgbe_rx_poll(struct xgbe_chan
+       if (!ring)
+               return 0;
+-      incomplete = 0;
++      last = 0;
+       context_next = 0;
+       napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+@@ -1926,9 +1944,8 @@ read_again:
+               received++;
+               ring->cur++;
+-              incomplete = XGMAC_GET_BITS(packet->attributes,
+-                                          RX_PACKET_ATTRIBUTES,
+-                                          INCOMPLETE);
++              last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++                                    LAST);
+               context_next = XGMAC_GET_BITS(packet->attributes,
+                                             RX_PACKET_ATTRIBUTES,
+                                             CONTEXT_NEXT);
+@@ -1937,7 +1954,7 @@ read_again:
+                                        CONTEXT);
+               /* Earlier error, just drain the remaining data */
+-              if ((incomplete || context_next) && error)
++              if ((!last || context_next) && error)
+                       goto read_again;
+               if (error || packet->errors) {
+@@ -1949,16 +1966,22 @@ read_again:
+               }
+               if (!context) {
+-                      /* Length is cumulative, get this descriptor's length */
+-                      rdesc_len = rdata->rx.len - len;
+-                      len += rdesc_len;
++                      /* Get the data length in the descriptor buffers */
++                      buf1_len = xgbe_rx_buf1_len(rdata, packet);
++                      len += buf1_len;
++                      buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
++                      len += buf2_len;
+-                      if (rdesc_len && !skb) {
++                      if (!skb) {
+                               skb = xgbe_create_skb(pdata, napi, rdata,
+-                                                    rdesc_len);
+-                              if (!skb)
++                                                    buf1_len);
++                              if (!skb) {
+                                       error = 1;
+-                      } else if (rdesc_len) {
++                                      goto skip_data;
++                              }
++                      }
++
++                      if (buf2_len) {
+                               dma_sync_single_range_for_cpu(pdata->dev,
+                                                       rdata->rx.buf.dma_base,
+                                                       rdata->rx.buf.dma_off,
+@@ -1968,13 +1991,14 @@ read_again:
+                               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+                                               rdata->rx.buf.pa.pages,
+                                               rdata->rx.buf.pa.pages_offset,
+-                                              rdesc_len,
++                                              buf2_len,
+                                               rdata->rx.buf.dma_len);
+                               rdata->rx.buf.pa.pages = NULL;
+                       }
+               }
+-              if (incomplete || context_next)
++skip_data:
++              if (!last || context_next)
+                       goto read_again;
+               if (!skb)
+@@ -2033,7 +2057,7 @@ next_packet:
+       }
+       /* Check if we need to save state before leaving */
+-      if (received && (incomplete || context_next)) {
++      if (received && (!last || context_next)) {
+               rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+               rdata->state_saved = 1;
+               rdata->state.skb = skb;
diff --git a/queue-4.4/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch b/queue-4.4/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
new file mode 100644 (file)
index 0000000..0f1b94c
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 21 Mar 2017 19:22:28 -0700
+Subject: ipv4: provide stronger user input validation in nl_fib_input()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c64c0b3cac4c5b8cb093727d2c19743ea3965c0b ]
+
+Alexander reported a KMSAN splat caused by reads of uninitialized
+field (tb_id_in) from user provided struct fib_result_nl
+
+It turns out nl_fib_input() sanity tests on user input is a bit
+wrong :
+
+User can pretend nlh->nlmsg_len is big enough, but provide
+at sendmsg() time a too small buffer.
+
+Reported-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1080,7 +1080,8 @@ static void nl_fib_input(struct sk_buff
+       net = sock_net(skb->sk);
+       nlh = nlmsg_hdr(skb);
+-      if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
++      if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
++          skb->len < nlh->nlmsg_len ||
+           nlmsg_len(nlh) < sizeof(*frn))
+               return;
diff --git a/queue-4.4/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch b/queue-4.4/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
new file mode 100644 (file)
index 0000000..b2838a5
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 15 Mar 2017 12:57:21 -0700
+Subject: net: bcmgenet: Do not suspend PHY if Wake-on-LAN is enabled
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 5371bbf4b295eea334ed453efa286afa2c3ccff3 ]
+
+Suspending the PHY would be putting it in a low power state where it
+may no longer allow us to do Wake-on-LAN.
+
+Fixes: cc013fb48898 ("net: bcmgenet: correctly suspend and resume PHY device")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -3495,7 +3495,8 @@ static int bcmgenet_suspend(struct devic
+       bcmgenet_netif_stop(dev);
+-      phy_suspend(priv->phydev);
++      if (!device_may_wakeup(d))
++              phy_suspend(priv->phydev);
+       netif_device_detach(dev);
+@@ -3592,7 +3593,8 @@ static int bcmgenet_resume(struct device
+       netif_device_attach(dev);
+-      phy_resume(priv->phydev);
++      if (!device_may_wakeup(d))
++              phy_resume(priv->phydev);
+       if (priv->eee.eee_enabled)
+               bcmgenet_eee_enable_set(dev, true);
diff --git a/queue-4.4/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch b/queue-4.4/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
new file mode 100644 (file)
index 0000000..cf7758d
--- /dev/null
@@ -0,0 +1,85 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Doug Berger <opendmb@gmail.com>
+Date: Tue, 21 Mar 2017 14:01:06 -0700
+Subject: net: bcmgenet: remove bcmgenet_internal_phy_setup()
+
+From: Doug Berger <opendmb@gmail.com>
+
+
+[ Upstream commit 31739eae738ccbe8b9d627c3f2251017ca03f4d2 ]
+
+Commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
+removed the bcmgenet_mii_reset() function from bcmgenet_power_up() and
+bcmgenet_internal_phy_setup() functions.  In so doing it broke the reset
+of the internal PHY devices used by the GENETv1-GENETv3 which required
+this reset before the UniMAC was enabled.  It also broke the internal
+GPHY devices used by the GENETv4 because the config_init that installed
+the AFE workaround was no longer occurring after the reset of the GPHY
+performed by bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup().
+In addition the code in bcmgenet_internal_phy_setup() related to the
+"enable APD" comment goes with the bcmgenet_mii_reset() so it should
+have also been removed.
+
+Commit bd4060a6108b ("net: bcmgenet: Power on integrated GPHY in
+bcmgenet_power_up()") moved the bcmgenet_phy_power_set() call to the
+bcmgenet_power_up() function, but failed to remove it from the
+bcmgenet_internal_phy_setup() function.  Had it done so, the
+bcmgenet_internal_phy_setup() function would have been empty and could
+have been removed at that time.
+
+Commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on")
+was submitted to correct the functional problems introduced by
+commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset"). It
+was included in v4.4 and made available on 4.3-stable. Unfortunately,
+it didn't fully revert the commit because this bcmgenet_mii_reset()
+doesn't apply the soft reset to the internal GPHY used by GENETv4 like
+the previous one did. This prevents the restoration of the AFE work-
+arounds for internal GPHY devices after the bcmgenet_phy_power_set() in
+bcmgenet_internal_phy_setup().
+
+This commit takes the alternate approach of removing the unnecessary
+bcmgenet_internal_phy_setup() function which shouldn't have been in v4.3
+so that when bcmgenet_mii_reset() was restored it should have only gone
+into bcmgenet_power_up().  This will avoid the problems while also
+removing the redundancy (and hopefully some of the confusion).
+
+Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmmii.c |   15 ---------------
+ 1 file changed, 15 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+@@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_d
+       udelay(60);
+ }
+-static void bcmgenet_internal_phy_setup(struct net_device *dev)
+-{
+-      struct bcmgenet_priv *priv = netdev_priv(dev);
+-      u32 reg;
+-
+-      /* Power up PHY */
+-      bcmgenet_phy_power_set(dev, true);
+-      /* enable APD */
+-      reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
+-      reg |= EXT_PWR_DN_EN_LD;
+-      bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+-      bcmgenet_mii_reset(dev);
+-}
+-
+ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
+ {
+       u32 reg;
+@@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_devic
+               if (priv->internal_phy) {
+                       phy_name = "internal PHY";
+-                      bcmgenet_internal_phy_setup(dev);
+               } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
+                       phy_name = "MoCA";
+                       bcmgenet_moca_phy_setup(priv);
diff --git a/queue-4.4/net-mlx5-increase-number-of-max-qps-in-default-profile.patch b/queue-4.4/net-mlx5-increase-number-of-max-qps-in-default-profile.patch
new file mode 100644 (file)
index 0000000..27e345d
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Maor Gottlieb <maorg@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:17 +0200
+Subject: net/mlx5: Increase number of max QPs in default profile
+
+From: Maor Gottlieb <maorg@mellanox.com>
+
+
+[ Upstream commit 5f40b4ed975c26016cf41953b7510fe90718e21c ]
+
+With ConnectX-4 sharing SRQs from the same space as QPs, we hit a
+limit preventing some applications to allocate needed QPs amount.
+Double the size to 256K.
+
+Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
+Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -85,7 +85,7 @@ static struct mlx5_profile profile[] = {
+       [2] = {
+               .mask           = MLX5_PROF_MASK_QP_SIZE |
+                                 MLX5_PROF_MASK_MR_CACHE,
+-              .log_max_qp     = 17,
++              .log_max_qp     = 18,
+               .mr_cache[0]    = {
+                       .size   = 500,
+                       .limit  = 250
diff --git a/queue-4.4/net-mlx5e-count-lro-packets-correctly.patch b/queue-4.4/net-mlx5e-count-lro-packets-correctly.patch
new file mode 100644 (file)
index 0000000..14317f7
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:19 +0200
+Subject: net/mlx5e: Count LRO packets correctly
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit 8ab7e2ae15d84ba758b2c8c6f4075722e9bd2a08 ]
+
+RX packets statistics ('rx_packets' counter) used to count LRO packets
+as one, even though it contains multiple segments.
+This patch will increment the counter by the number of segments, and
+align the driver with the behavior of other drivers in the stack.
+
+Note that no information is lost in this patch due to 'rx_lro_packets'
+counter existence.
+
+Before, ethtool showed:
+$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
+     rx_packets: 435277
+     rx_lro_packets: 35847
+     rx_packets_phy: 1935066
+
+Now, we will see the more logical statistics:
+$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
+     rx_packets: 1935066
+     rx_lro_packets: 35847
+     rx_packets_phy: 1935066
+
+Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -197,6 +197,10 @@ static inline void mlx5e_build_rx_skb(st
+       if (lro_num_seg > 1) {
+               mlx5e_lro_update_hdr(skb, cqe);
+               skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
++              /* Subtract one since we already counted this as one
++               * "regular" packet in mlx5e_complete_rx_cqe()
++               */
++              rq->stats.packets += lro_num_seg - 1;
+               rq->stats.lro_packets++;
+               rq->stats.lro_bytes += cqe_bcnt;
+       }
diff --git a/queue-4.4/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch b/queue-4.4/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
new file mode 100644 (file)
index 0000000..b348a38
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Wed, 15 Mar 2017 18:10:47 +0200
+Subject: net/openvswitch: Set the ipv6 source tunnel key address attribute correctly
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 3d20f1f7bd575d147ffa75621fa560eea0aec690 ]
+
+When dealing with ipv6 source tunnel key address attribute
+(OVS_TUNNEL_KEY_ATTR_IPV6_SRC) we are wrongly setting the tunnel
+dst ip, fix that.
+
+Fixes: 6b26ba3a7d95 ('openvswitch: netlink attributes for IPv6 tunneling')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Paul Blakey <paulb@mellanox.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Acked-by: Joe Stringer <joe@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -588,7 +588,7 @@ static int ip_tun_from_nlattr(const stru
+                       ipv4 = true;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+-                      SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
++                      SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
+                                       nla_get_in6_addr(a), is_mask);
+                       ipv6 = true;
+                       break;
diff --git a/queue-4.4/net-properly-release-sk_frag.page.patch b/queue-4.4/net-properly-release-sk_frag.page.patch
new file mode 100644 (file)
index 0000000..a251a4c
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 15 Mar 2017 13:21:28 -0700
+Subject: net: properly release sk_frag.page
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 ]
+
+I mistakenly added the code to release sk->sk_frag in
+sk_common_release() instead of sk_destruct()
+
+TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call
+sk_common_release() at close time, thus leaking one (order-3) page.
+
+iSCSI is using such sockets.
+
+Fixes: 5640f7685831 ("net: use a per task frag allocator")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1459,6 +1459,11 @@ void sk_destruct(struct sock *sk)
+               pr_debug("%s: optmem leakage (%d bytes) detected\n",
+                        __func__, atomic_read(&sk->sk_omem_alloc));
++      if (sk->sk_frag.page) {
++              put_page(sk->sk_frag.page);
++              sk->sk_frag.page = NULL;
++      }
++
+       if (sk->sk_peer_cred)
+               put_cred(sk->sk_peer_cred);
+       put_pid(sk->sk_peer_pid);
+@@ -2691,11 +2696,6 @@ void sk_common_release(struct sock *sk)
+       sk_refcnt_debug_release(sk);
+-      if (sk->sk_frag.page) {
+-              put_page(sk->sk_frag.page);
+-              sk->sk_frag.page = NULL;
+-      }
+-
+       sock_put(sk);
+ }
+ EXPORT_SYMBOL(sk_common_release);
diff --git a/queue-4.4/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch b/queue-4.4/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
new file mode 100644 (file)
index 0000000..e0b4d5a
--- /dev/null
@@ -0,0 +1,111 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Andrey Ulanov <andreyu@google.com>
+Date: Tue, 14 Mar 2017 20:16:42 -0700
+Subject: net: unix: properly re-increment inflight counter of GC discarded candidates
+
+From: Andrey Ulanov <andreyu@google.com>
+
+
+[ Upstream commit 7df9c24625b9981779afb8fcdbe2bb4765e61147 ]
+
+Dmitry has reported that a BUG_ON() condition in unix_notinflight()
+may be triggered by a simple code that forwards unix socket in an
+SCM_RIGHTS message.
+That is caused by incorrect unix socket GC implementation in unix_gc().
+
+The GC first collects list of candidates, then (a) decrements their
+"children's" inflight counter, (b) checks which inflight counters are
+now 0, and then (c) increments all inflight counters back.
+(a) and (c) are done by calling scan_children() with inc_inflight or
+dec_inflight as the second argument.
+
+Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage
+collector") changed scan_children() such that it no longer considers
+sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block
+of code that that unsets this flag _before_ invoking
+scan_children(, dec_iflight, ). This may lead to incorrect inflight
+counters for some sockets.
+
+This change fixes this bug by changing order of operations:
+UNIX_GC_CANDIDATE is now unset only after all inflight counters are
+restored to the original state.
+
+  kernel BUG at net/unix/garbage.c:149!
+  RIP: 0010:[<ffffffff8717ebf4>]  [<ffffffff8717ebf4>]
+  unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149
+  Call Trace:
+   [<ffffffff8716cfbf>] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487
+   [<ffffffff8716f6a9>] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496
+   [<ffffffff86a90a01>] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655
+   [<ffffffff86a9808a>] skb_release_all+0x1a/0x60 net/core/skbuff.c:668
+   [<ffffffff86a980ea>] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684
+   [<ffffffff86a98284>] kfree_skb+0x184/0x570 net/core/skbuff.c:705
+   [<ffffffff871789d5>] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559
+   [<ffffffff87179039>] unix_release+0x49/0x90 net/unix/af_unix.c:836
+   [<ffffffff86a694b2>] sock_release+0x92/0x1f0 net/socket.c:570
+   [<ffffffff86a6962b>] sock_close+0x1b/0x20 net/socket.c:1017
+   [<ffffffff81a76b8e>] __fput+0x34e/0x910 fs/file_table.c:208
+   [<ffffffff81a771da>] ____fput+0x1a/0x20 fs/file_table.c:244
+   [<ffffffff81483ab0>] task_work_run+0x1a0/0x280 kernel/task_work.c:116
+   [<     inline     >] exit_task_work include/linux/task_work.h:21
+   [<ffffffff8141287a>] do_exit+0x183a/0x2640 kernel/exit.c:828
+   [<ffffffff8141383e>] do_group_exit+0x14e/0x420 kernel/exit.c:931
+   [<ffffffff814429d3>] get_signal+0x663/0x1880 kernel/signal.c:2307
+   [<ffffffff81239b45>] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807
+   [<ffffffff8100666a>] exit_to_usermode_loop+0x1ea/0x2d0
+  arch/x86/entry/common.c:156
+   [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
+   [<ffffffff81009693>] syscall_return_slowpath+0x4d3/0x570
+  arch/x86/entry/common.c:259
+   [<ffffffff881478e6>] entry_SYSCALL_64_fastpath+0xc4/0xc6
+
+Link: https://lkml.org/lkml/2017/3/6/252
+Signed-off-by: Andrey Ulanov <andreyu@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/garbage.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct
+       if (s) {
+               struct unix_sock *u = unix_sk(s);
++              BUG_ON(!atomic_long_read(&u->inflight));
+               BUG_ON(list_empty(&u->link));
+               if (atomic_long_dec_and_test(&u->inflight))
+@@ -341,6 +342,14 @@ void unix_gc(void)
+       }
+       list_del(&cursor);
++      /* Now gc_candidates contains only garbage.  Restore original
++       * inflight counters for these as well, and remove the skbuffs
++       * which are creating the cycle(s).
++       */
++      skb_queue_head_init(&hitlist);
++      list_for_each_entry(u, &gc_candidates, link)
++              scan_children(&u->sk, inc_inflight, &hitlist);
++
+       /* not_cycle_list contains those sockets which do not make up a
+        * cycle.  Restore these to the inflight list.
+        */
+@@ -350,14 +359,6 @@ void unix_gc(void)
+               list_move_tail(&u->link, &gc_inflight_list);
+       }
+-      /* Now gc_candidates contains only garbage.  Restore original
+-       * inflight counters for these as well, and remove the skbuffs
+-       * which are creating the cycle(s).
+-       */
+-      skb_queue_head_init(&hitlist);
+-      list_for_each_entry(u, &gc_candidates, link)
+-      scan_children(&u->sk, inc_inflight, &hitlist);
+-
+       spin_unlock(&unix_gc_lock);
+       /* Here we are. Hitlist is filled. Die. */
diff --git a/queue-4.4/series b/queue-4.4/series
new file mode 100644 (file)
index 0000000..57f63b5
--- /dev/null
@@ -0,0 +1,11 @@
+net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
+net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
+net-properly-release-sk_frag.page.patch
+amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
+net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
+net-mlx5-increase-number-of-max-qps-in-default-profile.patch
+net-mlx5e-count-lro-packets-correctly.patch
+net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
+ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
+socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
+tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch
diff --git a/queue-4.4/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch b/queue-4.4/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
new file mode 100644 (file)
index 0000000..ae4989a
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 22 Mar 2017 13:08:08 +0100
+Subject: socket, bpf: fix sk_filter use after free in sk_clone_lock
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit a97e50cc4cb67e1e7bff56f6b41cda62ca832336 ]
+
+In sk_clone_lock(), we create a new socket and inherit most of the
+parent's members via sock_copy() which memcpy()'s various sections.
+Now, in case the parent socket had a BPF socket filter attached,
+then newsk->sk_filter points to the same instance as the original
+sk->sk_filter.
+
+sk_filter_charge() is then called on the newsk->sk_filter to take a
+reference and should that fail due to hitting max optmem, we bail
+out and release the newsk instance.
+
+The issue is that commit 278571baca2a ("net: filter: simplify socket
+charging") wrongly combined the dismantle path with the failure path
+of xfrm_sk_clone_policy(). This means, even when charging failed, we
+call sk_free_unlock_clone() on the newsk, which then still points to
+the same sk_filter as the original sk.
+
+Thus, sk_free_unlock_clone() calls into __sk_destruct() eventually
+where it tests for present sk_filter and calls sk_filter_uncharge()
+on it, which potentially lets sk_omem_alloc wrap around and releases
+the eBPF prog and sk_filter structure from the (still intact) parent.
+
+Fix it by making sure that when sk_filter_charge() failed, we reset
+newsk->sk_filter back to NULL before passing to sk_free_unlock_clone(),
+so that we don't mess with the parents sk_filter.
+
+Only if xfrm_sk_clone_policy() fails, we did reach the point where
+either the parent's filter was NULL and as a result newsk's as well
+or where we previously had a successful sk_filter_charge(), thus for
+that case, we do need sk_filter_uncharge() to release the prior taken
+reference on sk_filter.
+
+Fixes: 278571baca2a ("net: filter: simplify socket charging")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1557,6 +1557,12 @@ struct sock *sk_clone_lock(const struct
+                       is_charged = sk_filter_charge(newsk, filter);
+               if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
++                      /* We need to make sure that we don't uncharge the new
++                       * socket if we couldn't charge it in the first place
++                       * as otherwise we uncharge the parent's filter.
++                       */
++                      if (!is_charged)
++                              RCU_INIT_POINTER(newsk->sk_filter, NULL);
+                       /* It is still raw copy of parent, so invalidate
+                        * destructor and make plain sk_free() */
+                       newsk->sk_destruct = NULL;
diff --git a/queue-4.4/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch b/queue-4.4/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch
new file mode 100644 (file)
index 0000000..3535c10
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Mon Mar 27 18:22:09 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 22 Mar 2017 08:10:21 -0700
+Subject: tcp: initialize icsk_ack.lrcvtime at session start time
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 15bb7745e94a665caf42bfaabf0ce062845b533b ]
+
+icsk_ack.lrcvtime has a 0 value at socket creation time.
+
+tcpi_last_data_recv can have bogus value if no payload is ever received.
+
+This patch initializes icsk_ack.lrcvtime for active sessions
+in tcp_finish_connect(), and for passive sessions in
+tcp_create_openreq_child()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c     |    2 +-
+ net/ipv4/tcp_minisocks.c |    1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5435,6 +5435,7 @@ void tcp_finish_connect(struct sock *sk,
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       tcp_set_state(sk, TCP_ESTABLISHED);
++      icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+       if (skb) {
+               icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
+@@ -5647,7 +5648,6 @@ static int tcp_rcv_synsent_state_process
+                        * to stand against the temptation 8)     --ANK
+                        */
+                       inet_csk_schedule_ack(sk);
+-                      icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+                       tcp_enter_quickack_mode(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MAX, TCP_RTO_MAX);
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -472,6 +472,7 @@ struct sock *tcp_create_openreq_child(co
+               newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+               newtp->rtt_min[0].rtt = ~0U;
+               newicsk->icsk_rto = TCP_TIMEOUT_INIT;
++              newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
+               newtp->packets_out = 0;
+               newtp->retrans_out = 0;