4.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 27 Mar 2017 16:19:17 +0000 (18:19 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 27 Mar 2017 16:19:17 +0000 (18:19 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Mar 2017 16:19:17 +0000 (18:19 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Mar 2017 16:19:17 +0000 (18:19 +0200)
diff --git a/queue-4.10/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch b/queue-4.10/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch

new file mode 100644 (file)

index 0000000..2ed78ba
--- /dev/null
+++ b/queue-4.10/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
@@ -0,0 +1,284 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+Date: Wed, 15 Mar 2017 15:11:23 -0500
+Subject: amd-xgbe: Fix jumbo MTU processing on newer hardware
+
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+
+
+[ Upstream commit 622c36f143fc9566ba49d7cec994c2da1182d9e2 ]
+
+Newer hardware does not provide a cumulative payload length when multiple
+descriptors are needed to handle the data. Once the MTU increases beyond
+the size that can be handled by a single descriptor, the SKB does not get
+built properly by the driver.
+
+The driver will now calculate the size of the data buffers used by the
+hardware.  The first buffer of the first descriptor is for packet headers
+or packet headers and data when the headers can't be split. Subsequent
+descriptors in a multi-descriptor chain will not use the first buffer. The
+second buffer is used by all the descriptors in the chain for payload data.
+Based on whether the driver is processing the first, intermediate, or last
+descriptor it can calculate the buffer usage and build the SKB properly.
+
+Tested and verified on both old and new hardware.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-common.h |    6 +
+ drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |   20 +++--
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |  102 +++++++++++++++++-----------
+ 3 files changed, 78 insertions(+), 50 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+@@ -1148,8 +1148,8 @@
+ #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH  1
+ #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX  1
+ #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH  1
+-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX 2
+-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH 1
++#define RX_PACKET_ATTRIBUTES_LAST_INDEX               2
++#define RX_PACKET_ATTRIBUTES_LAST_WIDTH               1
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX       3
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH       1
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX    4
+@@ -1158,6 +1158,8 @@
+ #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH  1
+ #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX   6
+ #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH   1
++#define RX_PACKET_ATTRIBUTES_FIRST_INDEX      7
++#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH      1
+ 
+ #define RX_NORMAL_DESC0_OVT_INDEX             0
+ #define RX_NORMAL_DESC0_OVT_WIDTH             16
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+@@ -1896,10 +1896,15 @@ static int xgbe_dev_read(struct xgbe_cha
+ 
+       /* Get the header length */
+       if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) {
++              XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++                             FIRST, 1);
+               rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
+                                                     RX_NORMAL_DESC2, HL);
+               if (rdata->rx.hdr_len)
+                       pdata->ext_stats.rx_split_header_packets++;
++      } else {
++              XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++                             FIRST, 0);
+       }
+ 
+       /* Get the RSS hash */
+@@ -1922,19 +1927,16 @@ static int xgbe_dev_read(struct xgbe_cha
+               }
+       }
+ 
+-      /* Get the packet length */
+-      rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+-
+-      if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
+-              /* Not all the data has been transferred for this packet */
+-              XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+-                             INCOMPLETE, 1);
++      /* Not all the data has been transferred for this packet */
++      if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD))
+               return 0;
+-      }
+ 
+       /* This is the last of the data for this packet */
+       XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+-                     INCOMPLETE, 0);
++                     LAST, 1);
++
++      /* Get the packet length */
++      rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+ 
+       /* Set checksum done indicator as appropriate */
+       if (netdev->features & NETIF_F_RXCSUM)
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -1973,13 +1973,12 @@ static struct sk_buff *xgbe_create_skb(s
+ {
+       struct sk_buff *skb;
+       u8 *packet;
+-      unsigned int copy_len;
+ 
+       skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len);
+       if (!skb)
+               return NULL;
+ 
+-      /* Start with the header buffer which may contain just the header
++      /* Pull in the header buffer which may contain just the header
+        * or the header plus data
+        */
+       dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base,
+@@ -1988,30 +1987,49 @@ static struct sk_buff *xgbe_create_skb(s
+ 
+       packet = page_address(rdata->rx.hdr.pa.pages) +
+                rdata->rx.hdr.pa.pages_offset;
+-      copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len;
+-      copy_len = min(rdata->rx.hdr.dma_len, copy_len);
+-      skb_copy_to_linear_data(skb, packet, copy_len);
+-      skb_put(skb, copy_len);
+-
+-      len -= copy_len;
+-      if (len) {
+-              /* Add the remaining data as a frag */
+-              dma_sync_single_range_for_cpu(pdata->dev,
+-                                            rdata->rx.buf.dma_base,
+-                                            rdata->rx.buf.dma_off,
+-                                            rdata->rx.buf.dma_len,
+-                                            DMA_FROM_DEVICE);
+-
+-              skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+-                              rdata->rx.buf.pa.pages,
+-                              rdata->rx.buf.pa.pages_offset,
+-                              len, rdata->rx.buf.dma_len);
+-              rdata->rx.buf.pa.pages = NULL;
+-      }
++      skb_copy_to_linear_data(skb, packet, len);
++      skb_put(skb, len);
+ 
+       return skb;
+ }
+ 
++static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata,
++                                   struct xgbe_packet_data *packet)
++{
++      /* Always zero if not the first descriptor */
++      if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST))
++              return 0;
++
++      /* First descriptor with split header, return header length */
++      if (rdata->rx.hdr_len)
++              return rdata->rx.hdr_len;
++
++      /* First descriptor but not the last descriptor and no split header,
++       * so the full buffer was used
++       */
++      if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
++              return rdata->rx.hdr.dma_len;
++
++      /* First descriptor and last descriptor and no split header, so
++       * calculate how much of the buffer was used
++       */
++      return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len);
++}
++
++static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata,
++                                   struct xgbe_packet_data *packet,
++                                   unsigned int len)
++{
++      /* Always the full buffer if not the last descriptor */
++      if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
++              return rdata->rx.buf.dma_len;
++
++      /* Last descriptor so calculate how much of the buffer was used
++       * for the last bit of data
++       */
++      return rdata->rx.len - len;
++}
++
+ static int xgbe_tx_poll(struct xgbe_channel *channel)
+ {
+       struct xgbe_prv_data *pdata = channel->pdata;
+@@ -2094,8 +2112,8 @@ static int xgbe_rx_poll(struct xgbe_chan
+       struct napi_struct *napi;
+       struct sk_buff *skb;
+       struct skb_shared_hwtstamps *hwtstamps;
+-      unsigned int incomplete, error, context_next, context;
+-      unsigned int len, rdesc_len, max_len;
++      unsigned int last, error, context_next, context;
++      unsigned int len, buf1_len, buf2_len, max_len;
+       unsigned int received = 0;
+       int packet_count = 0;
+ 
+@@ -2105,7 +2123,7 @@ static int xgbe_rx_poll(struct xgbe_chan
+       if (!ring)
+               return 0;
+ 
+-      incomplete = 0;
++      last = 0;
+       context_next = 0;
+ 
+       napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+@@ -2139,9 +2157,8 @@ read_again:
+               received++;
+               ring->cur++;
+ 
+-              incomplete = XGMAC_GET_BITS(packet->attributes,
+-                                          RX_PACKET_ATTRIBUTES,
+-                                          INCOMPLETE);
++              last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++                                    LAST);
+               context_next = XGMAC_GET_BITS(packet->attributes,
+                                             RX_PACKET_ATTRIBUTES,
+                                             CONTEXT_NEXT);
+@@ -2150,7 +2167,7 @@ read_again:
+                                        CONTEXT);
+ 
+               /* Earlier error, just drain the remaining data */
+-              if ((incomplete || context_next) && error)
++              if ((!last || context_next) && error)
+                       goto read_again;
+ 
+               if (error || packet->errors) {
+@@ -2162,16 +2179,22 @@ read_again:
+               }
+ 
+               if (!context) {
+-                      /* Length is cumulative, get this descriptor's length */
+-                      rdesc_len = rdata->rx.len - len;
+-                      len += rdesc_len;
++                      /* Get the data length in the descriptor buffers */
++                      buf1_len = xgbe_rx_buf1_len(rdata, packet);
++                      len += buf1_len;
++                      buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
++                      len += buf2_len;
+ 
+-                      if (rdesc_len && !skb) {
++                      if (!skb) {
+                               skb = xgbe_create_skb(pdata, napi, rdata,
+-                                                    rdesc_len);
+-                              if (!skb)
++                                                    buf1_len);
++                              if (!skb) {
+                                       error = 1;
+-                      } else if (rdesc_len) {
++                                      goto skip_data;
++                              }
++                      }
++
++                      if (buf2_len) {
+                               dma_sync_single_range_for_cpu(pdata->dev,
+                                                       rdata->rx.buf.dma_base,
+                                                       rdata->rx.buf.dma_off,
+@@ -2181,13 +2204,14 @@ read_again:
+                               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+                                               rdata->rx.buf.pa.pages,
+                                               rdata->rx.buf.pa.pages_offset,
+-                                              rdesc_len,
++                                              buf2_len,
+                                               rdata->rx.buf.dma_len);
+                               rdata->rx.buf.pa.pages = NULL;
+                       }
+               }
+ 
+-              if (incomplete || context_next)
++skip_data:
++              if (!last || context_next)
+                       goto read_again;
+ 
+               if (!skb)
+@@ -2245,7 +2269,7 @@ next_packet:
+       }
+ 
+       /* Check if we need to save state before leaving */
+-      if (received && (incomplete || context_next)) {
++      if (received && (!last || context_next)) {
+               rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+               rdata->state_saved = 1;
+               rdata->state.skb = skb;
diff --git a/queue-4.10/amd-xgbe-fix-the-ecc-related-bit-position-definitions.patch b/queue-4.10/amd-xgbe-fix-the-ecc-related-bit-position-definitions.patch

new file mode 100644 (file)

index 0000000..1d38b64
--- /dev/null
+++ b/queue-4.10/amd-xgbe-fix-the-ecc-related-bit-position-definitions.patch
@@ -0,0 +1,67 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+Date: Wed, 22 Mar 2017 17:25:27 -0500
+Subject: amd-xgbe: Fix the ECC-related bit position definitions
+
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+
+
+[ Upstream commit f43feef4e6acde10857fcbfdede790d6b3f2c71d ]
+
+The ECC bit positions that describe whether the ECC interrupt is for
+Tx, Rx or descriptor memory and whether the it is a single correctable
+or double detected error were defined in incorrectly (reversed order).
+Fix the bit position definitions for these settings so that the proper
+ECC handling is performed.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-common.h |   24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+@@ -984,29 +984,29 @@
+ #define XP_ECC_CNT1_DESC_DED_WIDTH            8
+ #define XP_ECC_CNT1_DESC_SEC_INDEX            0
+ #define XP_ECC_CNT1_DESC_SEC_WIDTH            8
+-#define XP_ECC_IER_DESC_DED_INDEX             0
++#define XP_ECC_IER_DESC_DED_INDEX             5
+ #define XP_ECC_IER_DESC_DED_WIDTH             1
+-#define XP_ECC_IER_DESC_SEC_INDEX             1
++#define XP_ECC_IER_DESC_SEC_INDEX             4
+ #define XP_ECC_IER_DESC_SEC_WIDTH             1
+-#define XP_ECC_IER_RX_DED_INDEX                       2
++#define XP_ECC_IER_RX_DED_INDEX                       3
+ #define XP_ECC_IER_RX_DED_WIDTH                       1
+-#define XP_ECC_IER_RX_SEC_INDEX                       3
++#define XP_ECC_IER_RX_SEC_INDEX                       2
+ #define XP_ECC_IER_RX_SEC_WIDTH                       1
+-#define XP_ECC_IER_TX_DED_INDEX                       4
++#define XP_ECC_IER_TX_DED_INDEX                       1
+ #define XP_ECC_IER_TX_DED_WIDTH                       1
+-#define XP_ECC_IER_TX_SEC_INDEX                       5
++#define XP_ECC_IER_TX_SEC_INDEX                       0
+ #define XP_ECC_IER_TX_SEC_WIDTH                       1
+-#define XP_ECC_ISR_DESC_DED_INDEX             0
++#define XP_ECC_ISR_DESC_DED_INDEX             5
+ #define XP_ECC_ISR_DESC_DED_WIDTH             1
+-#define XP_ECC_ISR_DESC_SEC_INDEX             1
++#define XP_ECC_ISR_DESC_SEC_INDEX             4
+ #define XP_ECC_ISR_DESC_SEC_WIDTH             1
+-#define XP_ECC_ISR_RX_DED_INDEX                       2
++#define XP_ECC_ISR_RX_DED_INDEX                       3
+ #define XP_ECC_ISR_RX_DED_WIDTH                       1
+-#define XP_ECC_ISR_RX_SEC_INDEX                       3
++#define XP_ECC_ISR_RX_SEC_INDEX                       2
+ #define XP_ECC_ISR_RX_SEC_WIDTH                       1
+-#define XP_ECC_ISR_TX_DED_INDEX                       4
++#define XP_ECC_ISR_TX_DED_INDEX                       1
+ #define XP_ECC_ISR_TX_DED_WIDTH                       1
+-#define XP_ECC_ISR_TX_SEC_INDEX                       5
++#define XP_ECC_ISR_TX_SEC_INDEX                       0
+ #define XP_ECC_ISR_TX_SEC_WIDTH                       1
+ #define XP_I2C_MUTEX_BUSY_INDEX                       31
+ #define XP_I2C_MUTEX_BUSY_WIDTH                       1
diff --git a/queue-4.10/genetlink-fix-counting-regression-on-ctrl_dumpfamily.patch b/queue-4.10/genetlink-fix-counting-regression-on-ctrl_dumpfamily.patch

new file mode 100644 (file)

index 0000000..c8202dd
--- /dev/null
+++ b/queue-4.10/genetlink-fix-counting-regression-on-ctrl_dumpfamily.patch
@@ -0,0 +1,58 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+Date: Wed, 22 Mar 2017 16:08:33 +0100
+Subject: genetlink: fix counting regression on ctrl_dumpfamily()
+
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+
+
+[ Upstream commit 1d2a6a5e4bf2921531071fcff8538623dce74efa ]
+
+Commit 2ae0f17df1cd ("genetlink: use idr to track families") replaced
+
+       if (++n < fams_to_skip)
+               continue;
+into:
+
+       if (n++ < fams_to_skip)
+               continue;
+
+This subtle change cause that on retry ctrl_dumpfamily() call we omit
+one family that failed to do ctrl_fill_info() on previous call, because
+cb->args[0] = n number counts also family that failed to do
+ctrl_fill_info().
+
+Patch fixes the problem and avoid confusion in the future just decrease
+n counter when ctrl_fill_info() fail.
+
+User visible problem caused by this bug is failure to get access to
+some genetlink family i.e. nl80211. However problem is reproducible
+only if number of registered genetlink families is big enough to
+cause second call of ctrl_dumpfamily().
+
+Cc: Xose Vazquez Perez <xose.vazquez@gmail.com>
+Cc: Larry Finger <Larry.Finger@lwfinger.net>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Fixes: 2ae0f17df1cd ("genetlink: use idr to track families")
+Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/genetlink.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/netlink/genetlink.c
++++ b/net/netlink/genetlink.c
+@@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buf
+ 
+               if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+-                                 skb, CTRL_CMD_NEWFAMILY) < 0)
++                                 skb, CTRL_CMD_NEWFAMILY) < 0) {
++                      n--;
+                       break;
++              }
+       }
+ 
+       cb->args[0] = n;
diff --git a/queue-4.10/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch b/queue-4.10/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch

new file mode 100644 (file)

index 0000000..7e11b74
--- /dev/null
+++ b/queue-4.10/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
@@ -0,0 +1,39 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 21 Mar 2017 19:22:28 -0700
+Subject: ipv4: provide stronger user input validation in nl_fib_input()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c64c0b3cac4c5b8cb093727d2c19743ea3965c0b ]
+
+Alexander reported a KMSAN splat caused by reads of uninitialized
+field (tb_id_in) from user provided struct fib_result_nl
+
+It turns out nl_fib_input() sanity tests on user input is a bit
+wrong :
+
+User can pretend nlh->nlmsg_len is big enough, but provide
+at sendmsg() time a too small buffer.
+
+Reported-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1082,7 +1082,8 @@ static void nl_fib_input(struct sk_buff
+ 
+       net = sock_net(skb->sk);
+       nlh = nlmsg_hdr(skb);
+-      if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
++      if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
++          skb->len < nlh->nlmsg_len ||
+           nlmsg_len(nlh) < sizeof(*frn))
+               return;
+ 
diff --git a/queue-4.10/ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch b/queue-4.10/ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch

new file mode 100644 (file)

index 0000000..0c736ab
--- /dev/null
+++ b/queue-4.10/ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch
@@ -0,0 +1,43 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Tue, 21 Mar 2017 17:14:27 +0100
+Subject: ipv6: make sure to initialize sockc.tsflags before first use
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit d515684d78148884d5fc425ba904c50f03844020 ]
+
+In the case udp_sk(sk)->pending is AF_INET6, udpv6_sendmsg() would
+jump to do_append_data, skipping the initialization of sockc.tsflags.
+Fix the problem by moving sockc.tsflags initialization earlier.
+
+The bug was detected with KMSAN.
+
+Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/udp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1022,6 +1022,7 @@ int udpv6_sendmsg(struct sock *sk, struc
+       ipc6.hlimit = -1;
+       ipc6.tclass = -1;
+       ipc6.dontfrag = -1;
++      sockc.tsflags = sk->sk_tsflags;
+ 
+       /* destination address check */
+       if (sin6) {
+@@ -1146,7 +1147,6 @@ do_udp_sendmsg:
+ 
+       fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_uid = sk->sk_uid;
+-      sockc.tsflags = sk->sk_tsflags;
+ 
+       if (msg->msg_controllen) {
+               opt = &opt_space;
diff --git a/queue-4.10/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch b/queue-4.10/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch

new file mode 100644 (file)

index 0000000..98ccd8c
--- /dev/null
+++ b/queue-4.10/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
@@ -0,0 +1,43 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 15 Mar 2017 12:57:21 -0700
+Subject: net: bcmgenet: Do not suspend PHY if Wake-on-LAN is enabled
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 5371bbf4b295eea334ed453efa286afa2c3ccff3 ]
+
+Suspending the PHY would be putting it in a low power state where it
+may no longer allow us to do Wake-on-LAN.
+
+Fixes: cc013fb48898 ("net: bcmgenet: correctly suspend and resume PHY device")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -3395,7 +3395,8 @@ static int bcmgenet_suspend(struct devic
+ 
+       bcmgenet_netif_stop(dev);
+ 
+-      phy_suspend(priv->phydev);
++      if (!device_may_wakeup(d))
++              phy_suspend(priv->phydev);
+ 
+       netif_device_detach(dev);
+ 
+@@ -3492,7 +3493,8 @@ static int bcmgenet_resume(struct device
+ 
+       netif_device_attach(dev);
+ 
+-      phy_resume(priv->phydev);
++      if (!device_may_wakeup(d))
++              phy_resume(priv->phydev);
+ 
+       if (priv->eee.eee_enabled)
+               bcmgenet_eee_enable_set(dev, true);
diff --git a/queue-4.10/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch b/queue-4.10/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch

new file mode 100644 (file)

index 0000000..9a24b73
--- /dev/null
+++ b/queue-4.10/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
@@ -0,0 +1,85 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Doug Berger <opendmb@gmail.com>
+Date: Tue, 21 Mar 2017 14:01:06 -0700
+Subject: net: bcmgenet: remove bcmgenet_internal_phy_setup()
+
+From: Doug Berger <opendmb@gmail.com>
+
+
+[ Upstream commit 31739eae738ccbe8b9d627c3f2251017ca03f4d2 ]
+
+Commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
+removed the bcmgenet_mii_reset() function from bcmgenet_power_up() and
+bcmgenet_internal_phy_setup() functions.  In so doing it broke the reset
+of the internal PHY devices used by the GENETv1-GENETv3 which required
+this reset before the UniMAC was enabled.  It also broke the internal
+GPHY devices used by the GENETv4 because the config_init that installed
+the AFE workaround was no longer occurring after the reset of the GPHY
+performed by bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup().
+In addition the code in bcmgenet_internal_phy_setup() related to the
+"enable APD" comment goes with the bcmgenet_mii_reset() so it should
+have also been removed.
+
+Commit bd4060a6108b ("net: bcmgenet: Power on integrated GPHY in
+bcmgenet_power_up()") moved the bcmgenet_phy_power_set() call to the
+bcmgenet_power_up() function, but failed to remove it from the
+bcmgenet_internal_phy_setup() function.  Had it done so, the
+bcmgenet_internal_phy_setup() function would have been empty and could
+have been removed at that time.
+
+Commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on")
+was submitted to correct the functional problems introduced by
+commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset"). It
+was included in v4.4 and made available on 4.3-stable. Unfortunately,
+it didn't fully revert the commit because this bcmgenet_mii_reset()
+doesn't apply the soft reset to the internal GPHY used by GENETv4 like
+the previous one did. This prevents the restoration of the AFE work-
+arounds for internal GPHY devices after the bcmgenet_phy_power_set() in
+bcmgenet_internal_phy_setup().
+
+This commit takes the alternate approach of removing the unnecessary
+bcmgenet_internal_phy_setup() function which shouldn't have been in v4.3
+so that when bcmgenet_mii_reset() was restored it should have only gone
+into bcmgenet_power_up().  This will avoid the problems while also
+removing the redundancy (and hopefully some of the confusion).
+
+Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmmii.c |   15 ---------------
+ 1 file changed, 15 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+@@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_d
+       udelay(60);
+ }
+ 
+-static void bcmgenet_internal_phy_setup(struct net_device *dev)
+-{
+-      struct bcmgenet_priv *priv = netdev_priv(dev);
+-      u32 reg;
+-
+-      /* Power up PHY */
+-      bcmgenet_phy_power_set(dev, true);
+-      /* enable APD */
+-      reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
+-      reg |= EXT_PWR_DN_EN_LD;
+-      bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+-      bcmgenet_mii_reset(dev);
+-}
+-
+ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
+ {
+       u32 reg;
+@@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_devic
+ 
+               if (priv->internal_phy) {
+                       phy_name = "internal PHY";
+-                      bcmgenet_internal_phy_setup(dev);
+               } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
+                       phy_name = "MoCA";
+                       bcmgenet_moca_phy_setup(priv);
diff --git a/queue-4.10/net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch b/queue-4.10/net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch

new file mode 100644 (file)

index 0000000..cc0ebc8
--- /dev/null
+++ b/queue-4.10/net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch
@@ -0,0 +1,44 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:12 +0200
+Subject: net/mlx5: Add missing entries for set/query rate limit commands
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 1f30a86c58093046dc3e49c23d2618894e098f7a ]
+
+The switch cases for the rate limit set and query commands were
+missing, which could get us wrong under fw error or driver reset
+flow, fix that.
+
+Fixes: 1466cc5b23d1 ('net/mlx5: Rate limit tables support')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -361,6 +361,8 @@ static int mlx5_internal_err_ret_value(s
+       case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+       case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+       case MLX5_CMD_OP_QUERY_Q_COUNTER:
++      case MLX5_CMD_OP_SET_RATE_LIMIT:
++      case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+       case MLX5_CMD_OP_ALLOC_PD:
+       case MLX5_CMD_OP_ALLOC_UAR:
+       case MLX5_CMD_OP_CONFIG_INT_MODERATION:
+@@ -497,6 +499,8 @@ const char *mlx5_command_str(int command
+       MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
+       MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
+       MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
++      MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
++      MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(ALLOC_PD);
+       MLX5_COMMAND_STR_CASE(DEALLOC_PD);
+       MLX5_COMMAND_STR_CASE(ALLOC_UAR);
diff --git a/queue-4.10/net-mlx5-e-switch-don-t-allow-changing-inline-mode-when-flows-are-configured.patch b/queue-4.10/net-mlx5-e-switch-don-t-allow-changing-inline-mode-when-flows-are-configured.patch

new file mode 100644 (file)

index 0000000..432cc13
--- /dev/null
+++ b/queue-4.10/net-mlx5-e-switch-don-t-allow-changing-inline-mode-when-flows-are-configured.patch
@@ -0,0 +1,69 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Roi Dayan <roid@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:14 +0200
+Subject: net/mlx5: E-Switch, Don't allow changing inline mode when flows are configured
+
+From: Roi Dayan <roid@mellanox.com>
+
+
+[ Upstream commit 375f51e2b5b7b9a42b3139aea519cbb1bfc5d6ef ]
+
+Changing the eswitch inline mode can potentially cause already configured
+flows not to match the policy. E.g. set policy L4, add some L4 rules,
+set policy to L2 --> bad! Hence we disallow it.
+
+Keep track of how many offloaded rules are now set and refuse
+inline mode changes if this isn't zero.
+
+Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
+Signed-off-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          |    1 +
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |    8 ++++++++
+ 2 files changed, 9 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -201,6 +201,7 @@ struct mlx5_esw_offload {
+       struct mlx5_eswitch_rep *vport_reps;
+       DECLARE_HASHTABLE(encap_tbl, 8);
+       u8 inline_mode;
++      u64 num_flows;
+ };
+ 
+ struct mlx5_eswitch {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -93,6 +93,8 @@ mlx5_eswitch_add_offloaded_rule(struct m
+                                  spec, &flow_act, dest, i);
+       if (IS_ERR(rule))
+               mlx5_fc_destroy(esw->dev, counter);
++      else
++              esw->offloads.num_flows++;
+ 
+       return rule;
+ }
+@@ -108,6 +110,7 @@ mlx5_eswitch_del_offloaded_rule(struct m
+               counter = mlx5_flow_rule_counter(rule);
+               mlx5_del_flow_rules(rule);
+               mlx5_fc_destroy(esw->dev, counter);
++              esw->offloads.num_flows--;
+       }
+ }
+ 
+@@ -919,6 +922,11 @@ int mlx5_devlink_eswitch_inline_mode_set
+           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+               return -EOPNOTSUPP;
+ 
++      if (esw->offloads.num_flows > 0) {
++              esw_warn(dev, "Can't set inline mode when flows are configured\n");
++              return -EOPNOTSUPP;
++      }
++
+       err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+       if (err)
+               goto out;
diff --git a/queue-4.10/net-mlx5-increase-number-of-max-qps-in-default-profile.patch b/queue-4.10/net-mlx5-increase-number-of-max-qps-in-default-profile.patch

new file mode 100644 (file)

index 0000000..1aaf360
--- /dev/null
+++ b/queue-4.10/net-mlx5-increase-number-of-max-qps-in-default-profile.patch
@@ -0,0 +1,34 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Maor Gottlieb <maorg@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:17 +0200
+Subject: net/mlx5: Increase number of max QPs in default profile
+
+From: Maor Gottlieb <maorg@mellanox.com>
+
+
+[ Upstream commit 5f40b4ed975c26016cf41953b7510fe90718e21c ]
+
+With ConnectX-4 sharing SRQs from the same space as QPs, we hit a
+limit preventing some applications to allocate needed QPs amount.
+Double the size to 256K.
+
+Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
+Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = {
+       [2] = {
+               .mask           = MLX5_PROF_MASK_QP_SIZE |
+                                 MLX5_PROF_MASK_MR_CACHE,
+-              .log_max_qp     = 17,
++              .log_max_qp     = 18,
+               .mr_cache[0]    = {
+                       .size   = 500,
+                       .limit  = 250
diff --git a/queue-4.10/net-mlx5e-avoid-supporting-udp-tunnel-port-ndo-for-vf-reps.patch b/queue-4.10/net-mlx5e-avoid-supporting-udp-tunnel-port-ndo-for-vf-reps.patch

new file mode 100644 (file)

index 0000000..904d773
--- /dev/null
+++ b/queue-4.10/net-mlx5e-avoid-supporting-udp-tunnel-port-ndo-for-vf-reps.patch
@@ -0,0 +1,127 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Paul Blakey <paulb@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:16 +0200
+Subject: net/mlx5e: Avoid supporting udp tunnel port ndo for VF reps
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit 1ad9a00ae0efc2e9337148d6c382fad3d27bf99a ]
+
+This was added to allow the TC offloading code to identify offloading
+encap/decap vxlan rules.
+
+The VF reps are effectively related to the same mlx5 PCI device as the
+PF. Since the kernel invokes the (say) delete ndo for each netdev, the
+FW erred on multiple vxlan dst port deletes when the port was deleted
+from the system.
+
+We fix that by keeping the registration to be carried out only by the
+PF. Since the PF serves as the uplink device, the VF reps will look
+up a port there and realize if they are ok to offload that.
+
+Tested:
+ <SETUP VFS>
+ <SETUP switchdev mode to have representors>
+ ip link add vxlan1 type vxlan id 44 dev ens5f0 dstport 9999
+ ip link set vxlan1 up
+ ip link del dev vxlan1
+
+Fixes: 4a25730eb202 ('net/mlx5e: Add ndo_udp_tunnel_add to VF representors')
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h      |    4 ----
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    8 ++++----
+ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  |    2 --
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   |    9 +++++++--
+ 4 files changed, 11 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -921,10 +921,6 @@ void mlx5e_destroy_netdev(struct mlx5_co
+ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
+-void mlx5e_add_vxlan_port(struct net_device *netdev,
+-                        struct udp_tunnel_info *ti);
+-void mlx5e_del_vxlan_port(struct net_device *netdev,
+-                        struct udp_tunnel_info *ti);
+ 
+ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+                           void *sp);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3055,8 +3055,8 @@ static int mlx5e_get_vf_stats(struct net
+                                           vf_stats);
+ }
+ 
+-void mlx5e_add_vxlan_port(struct net_device *netdev,
+-                        struct udp_tunnel_info *ti)
++static void mlx5e_add_vxlan_port(struct net_device *netdev,
++                               struct udp_tunnel_info *ti)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+ 
+@@ -3069,8 +3069,8 @@ void mlx5e_add_vxlan_port(struct net_dev
+       mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
+ }
+ 
+-void mlx5e_del_vxlan_port(struct net_device *netdev,
+-                        struct udp_tunnel_info *ti)
++static void mlx5e_del_vxlan_port(struct net_device *netdev,
++                               struct udp_tunnel_info *ti)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+ 
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -394,8 +394,6 @@ static const struct net_device_ops mlx5e
+       .ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
+       .ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
+       .ndo_get_stats64         = mlx5e_rep_get_stats,
+-      .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
+-      .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
+       .ndo_has_offload_stats   = mlx5e_has_offload_stats,
+       .ndo_get_offload_stats   = mlx5e_get_offload_stats,
+ };
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -264,12 +264,15 @@ static int parse_tunnel_attr(struct mlx5
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_PORTS,
+                                                 f->mask);
++              struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++              struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
++              struct mlx5e_priv *up_priv = netdev_priv(up_dev);
+ 
+               /* Full udp dst port must be given */
+               if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
+                       goto vxlan_match_offload_err;
+ 
+-              if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
++              if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
+                   MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+                       parse_vxlan_attr(spec, f);
+               else {
+@@ -827,6 +830,8 @@ static int mlx5e_attach_encap(struct mlx
+                             struct mlx5_esw_flow_attr *attr)
+ {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++      struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
++      struct mlx5e_priv *up_priv = netdev_priv(up_dev);
+       unsigned short family = ip_tunnel_info_af(tun_info);
+       struct ip_tunnel_key *key = &tun_info->key;
+       struct mlx5_encap_info info;
+@@ -849,7 +854,7 @@ vxlan_encap_offload_err:
+               return -EOPNOTSUPP;
+       }
+ 
+-      if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
++      if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
+           MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
+               info.tp_dst = key->tp_dst;
+               info.tun_id = tunnel_id_to_key32(key->tun_id);
diff --git a/queue-4.10/net-mlx5e-change-the-tc-offload-rule-add-del-code-path-to-be-per-nic-or-e-switch.patch b/queue-4.10/net-mlx5e-change-the-tc-offload-rule-add-del-code-path-to-be-per-nic-or-e-switch.patch

new file mode 100644 (file)

index 0000000..4bf31b1
--- /dev/null
+++ b/queue-4.10/net-mlx5e-change-the-tc-offload-rule-add-del-code-path-to-be-per-nic-or-e-switch.patch
@@ -0,0 +1,144 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:13 +0200
+Subject: net/mlx5e: Change the TC offload rule add/del code path to be per NIC or E-Switch
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit d85cdccbb3fe9a632ec9d0f4e4526c8c84fc3523 ]
+
+Refactor the code to deal with add/del TC rules to have handler per NIC/E-switch
+offloading use case, and push the latter into the e-switch code. This provides
+better separation and is to be used in down-stream patch for applying a fix.
+
+Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c            |   57 ++++++++-----
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          |    5 +
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |   14 +++
+ 3 files changed, 58 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -128,6 +128,23 @@ err_create_ft:
+       return rule;
+ }
+ 
++static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
++                                struct mlx5e_tc_flow *flow)
++{
++      struct mlx5_fc *counter = NULL;
++
++      if (!IS_ERR(flow->rule)) {
++              counter = mlx5_flow_rule_counter(flow->rule);
++              mlx5_del_flow_rules(flow->rule);
++              mlx5_fc_destroy(priv->mdev, counter);
++      }
++
++      if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
++              mlx5_destroy_flow_table(priv->fs.tc.t);
++              priv->fs.tc.t = NULL;
++      }
++}
++
+ static struct mlx5_flow_handle *
+ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+                     struct mlx5_flow_spec *spec,
+@@ -144,7 +161,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
+ }
+ 
+ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+-                             struct mlx5e_tc_flow *flow) {
++                             struct mlx5e_tc_flow *flow);
++
++static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
++                                struct mlx5e_tc_flow *flow)
++{
++      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++
++      mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr);
++
++      mlx5_eswitch_del_vlan_action(esw, flow->attr);
++
++      if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
++              mlx5e_detach_encap(priv, flow);
++}
++
++static void mlx5e_detach_encap(struct mlx5e_priv *priv,
++                             struct mlx5e_tc_flow *flow)
++{
+       struct list_head *next = flow->encap.next;
+ 
+       list_del(&flow->encap);
+@@ -169,24 +203,11 @@ static void mlx5e_tc_del_flow(struct mlx
+                             struct mlx5e_tc_flow *flow)
+ {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+-      struct mlx5_fc *counter = NULL;
+ 
+-      if (!IS_ERR(flow->rule)) {
+-              counter = mlx5_flow_rule_counter(flow->rule);
+-              mlx5_del_flow_rules(flow->rule);
+-              mlx5_fc_destroy(priv->mdev, counter);
+-      }
+-
+-      if (esw && esw->mode == SRIOV_OFFLOADS) {
+-              mlx5_eswitch_del_vlan_action(esw, flow->attr);
+-              if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+-                      mlx5e_detach_encap(priv, flow);
+-      }
+-
+-      if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+-              mlx5_destroy_flow_table(priv->fs.tc.t);
+-              priv->fs.tc.t = NULL;
+-      }
++      if (esw && esw->mode == SRIOV_OFFLOADS)
++              mlx5e_tc_del_fdb_flow(priv, flow);
++      else
++              mlx5e_tc_del_nic_flow(priv, flow);
+ }
+ 
+ static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -263,6 +263,11 @@ struct mlx5_flow_handle *
+ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+                               struct mlx5_flow_spec *spec,
+                               struct mlx5_esw_flow_attr *attr);
++void
++mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
++                              struct mlx5_flow_handle *rule,
++                              struct mlx5_esw_flow_attr *attr);
++
+ struct mlx5_flow_handle *
+ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+ 
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -97,6 +97,20 @@ mlx5_eswitch_add_offloaded_rule(struct m
+       return rule;
+ }
+ 
++void
++mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
++                              struct mlx5_flow_handle *rule,
++                              struct mlx5_esw_flow_attr *attr)
++{
++      struct mlx5_fc *counter = NULL;
++
++      if (!IS_ERR(rule)) {
++              counter = mlx5_flow_rule_counter(rule);
++              mlx5_del_flow_rules(rule);
++              mlx5_fc_destroy(esw->dev, counter);
++      }
++}
++
+ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+ {
+       struct mlx5_eswitch_rep *rep;
diff --git a/queue-4.10/net-mlx5e-count-gso-packets-correctly.patch b/queue-4.10/net-mlx5e-count-gso-packets-correctly.patch

new file mode 100644 (file)

index 0000000..6755244
--- /dev/null
+++ b/queue-4.10/net-mlx5e-count-gso-packets-correctly.patch
@@ -0,0 +1,71 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:18 +0200
+Subject: net/mlx5e: Count GSO packets correctly
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit d3a4e4da54c7adb420d5f48e89be913b14bdeff1 ]
+
+TX packets statistics ('tx_packets' counter) used to count GSO packets
+as one, even though it contains multiple segments.
+This patch will increment the counter by the number of segments, and
+align the driver with the behavior of other drivers in the stack.
+
+Note that no information is lost in this patch due to 'tx_tso_packets'
+counter existence.
+
+Before, ethtool showed:
+$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
+     tx_packets: 61340
+     tx_tso_packets: 60954
+     tx_packets_phy: 2451115
+
+Now, we will see the more logical statistics:
+$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
+     tx_packets: 2451115
+     tx_tso_packets: 60954
+     tx_packets_phy: 2451115
+
+Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -272,15 +272,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct
+                       sq->stats.tso_bytes += skb->len - ihs;
+               }
+ 
++              sq->stats.packets += skb_shinfo(skb)->gso_segs;
+               num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
+       } else {
+               bf = sq->bf_budget &&
+                    !skb->xmit_more &&
+                    !skb_shinfo(skb)->nr_frags;
+               ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
++              sq->stats.packets++;
+               num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+       }
+ 
++      sq->stats.bytes += num_bytes;
+       wi->num_bytes = num_bytes;
+ 
+       if (skb_vlan_tag_present(skb)) {
+@@ -377,8 +380,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct
+       if (bf)
+               sq->bf_budget--;
+ 
+-      sq->stats.packets++;
+-      sq->stats.bytes += num_bytes;
+       return NETDEV_TX_OK;
+ 
+ dma_unmap_wqe_err:
diff --git a/queue-4.10/net-mlx5e-count-lro-packets-correctly.patch b/queue-4.10/net-mlx5e-count-lro-packets-correctly.patch

new file mode 100644 (file)

index 0000000..f50566b
--- /dev/null
+++ b/queue-4.10/net-mlx5e-count-lro-packets-correctly.patch
@@ -0,0 +1,54 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:19 +0200
+Subject: net/mlx5e: Count LRO packets correctly
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit 8ab7e2ae15d84ba758b2c8c6f4075722e9bd2a08 ]
+
+RX packets statistics ('rx_packets' counter) used to count LRO packets
+as one, even though it contains multiple segments.
+This patch will increment the counter by the number of segments, and
+align the driver with the behavior of other drivers in the stack.
+
+Note that no information is lost in this patch due to 'rx_lro_packets'
+counter existence.
+
+Before, ethtool showed:
+$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
+     rx_packets: 435277
+     rx_lro_packets: 35847
+     rx_packets_phy: 1935066
+
+Now, we will see the more logical statistics:
+$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
+     rx_packets: 1935066
+     rx_lro_packets: 35847
+     rx_packets_phy: 1935066
+
+Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -603,6 +603,10 @@ static inline void mlx5e_build_rx_skb(st
+       if (lro_num_seg > 1) {
+               mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
+               skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
++              /* Subtract one since we already counted this as one
++               * "regular" packet in mlx5e_complete_rx_cqe()
++               */
++              rq->stats.packets += lro_num_seg - 1;
+               rq->stats.lro_packets++;
+               rq->stats.lro_bytes += cqe_bcnt;
+       }
diff --git a/queue-4.10/net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch b/queue-4.10/net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch

new file mode 100644 (file)

index 0000000..2daa3c3
--- /dev/null
+++ b/queue-4.10/net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch
@@ -0,0 +1,45 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:15 +0200
+Subject: net/mlx5e: Use the proper UAPI values when offloading TC vlan actions
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 09c91ddf2cd33489c2c14edfef43ae38d412888e ]
+
+Currently we use the non UAPI values and we miss erring on
+the modify action which is not supported, fix that.
+
+Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Petr Machata <petrm@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -974,14 +974,16 @@ static int parse_tc_fdb_actions(struct m
+               }
+ 
+               if (is_tcf_vlan(a)) {
+-                      if (tcf_vlan_action(a) == VLAN_F_POP) {
++                      if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+-                      } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
++                      } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+                               if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
+                                       return -EOPNOTSUPP;
+ 
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+                               attr->vlan = tcf_vlan_push_vid(a);
++                      } else { /* action is TCA_VLAN_ACT_MODIFY */
++                              return -EOPNOTSUPP;
+                       }
+                       continue;
+               }
diff --git a/queue-4.10/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch b/queue-4.10/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch

new file mode 100644 (file)

index 0000000..e78d823
--- /dev/null
+++ b/queue-4.10/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
@@ -0,0 +1,36 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Wed, 15 Mar 2017 18:10:47 +0200
+Subject: net/openvswitch: Set the ipv6 source tunnel key address attribute correctly
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 3d20f1f7bd575d147ffa75621fa560eea0aec690 ]
+
+When dealing with ipv6 source tunnel key address attribute
+(OVS_TUNNEL_KEY_ATTR_IPV6_SRC) we are wrongly setting the tunnel
+dst ip, fix that.
+
+Fixes: 6b26ba3a7d95 ('openvswitch: netlink attributes for IPv6 tunneling')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Paul Blakey <paulb@mellanox.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Acked-by: Joe Stringer <joe@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -588,7 +588,7 @@ static int ip_tun_from_nlattr(const stru
+                       ipv4 = true;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+-                      SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
++                      SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
+                                       nla_get_in6_addr(a), is_mask);
+                       ipv6 = true;
+                       break;
diff --git a/queue-4.10/net-properly-release-sk_frag.page.patch b/queue-4.10/net-properly-release-sk_frag.page.patch

new file mode 100644 (file)

index 0000000..473ad1e
--- /dev/null
+++ b/queue-4.10/net-properly-release-sk_frag.page.patch
@@ -0,0 +1,52 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 15 Mar 2017 13:21:28 -0700
+Subject: net: properly release sk_frag.page
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 ]
+
+I mistakenly added the code to release sk->sk_frag in
+sk_common_release() instead of sk_destruct()
+
+TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call
+sk_common_release() at close time, thus leaking one (order-3) page.
+
+iSCSI is using such sockets.
+
+Fixes: 5640f7685831 ("net: use a per task frag allocator")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1444,6 +1444,11 @@ static void __sk_destruct(struct rcu_hea
+               pr_debug("%s: optmem leakage (%d bytes) detected\n",
+                        __func__, atomic_read(&sk->sk_omem_alloc));
+ 
++      if (sk->sk_frag.page) {
++              put_page(sk->sk_frag.page);
++              sk->sk_frag.page = NULL;
++      }
++
+       if (sk->sk_peer_cred)
+               put_cred(sk->sk_peer_cred);
+       put_pid(sk->sk_peer_pid);
+@@ -2774,11 +2779,6 @@ void sk_common_release(struct sock *sk)
+ 
+       sk_refcnt_debug_release(sk);
+ 
+-      if (sk->sk_frag.page) {
+-              put_page(sk->sk_frag.page);
+-              sk->sk_frag.page = NULL;
+-      }
+-
+       sock_put(sk);
+ }
+ EXPORT_SYMBOL(sk_common_release);
diff --git a/queue-4.10/net-solve-a-napi-race.patch b/queue-4.10/net-solve-a-napi-race.patch

new file mode 100644 (file)

index 0000000..f5c615a
--- /dev/null
+++ b/queue-4.10/net-solve-a-napi-race.patch
@@ -0,0 +1,255 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 16 Mar 2017 19:02:33 -0700
+Subject: net: solve a NAPI race
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+commit 39e6c8208d7b6fb9d2047850fb3327db567b564b upstream.
+
+While playing with mlx4 hardware timestamping of RX packets, I found
+that some packets were received by TCP stack with a ~200 ms delay...
+
+Since the timestamp was provided by the NIC, and my probe was added
+in tcp_v4_rcv() while in BH handler, I was confident it was not
+a sender issue, or a drop in the network.
+
+This would happen with a very low probability, but hurting RPC
+workloads.
+
+A NAPI driver normally arms the IRQ after the napi_complete_done(),
+after NAPI_STATE_SCHED is cleared, so that the hard irq handler can grab
+it.
+
+Problem is that if another point in the stack grabs NAPI_STATE_SCHED bit
+while IRQ are not disabled, we might have later an IRQ firing and
+finding this bit set, right before napi_complete_done() clears it.
+
+This can happen with busy polling users, or if gro_flush_timeout is
+used. But some other uses of napi_schedule() in drivers can cause this
+as well.
+
+thread 1                                 thread 2 (could be on same cpu, or not)
+
+// busy polling or napi_watchdog()
+napi_schedule();
+...
+napi->poll()
+
+device polling:
+read 2 packets from ring buffer
+                                          Additional 3rd packet is
+available.
+                                          device hard irq
+
+                                          // does nothing because
+NAPI_STATE_SCHED bit is owned by thread 1
+                                          napi_schedule();
+
+napi_complete_done(napi, 2);
+rearm_irq();
+
+Note that rearm_irq() will not force the device to send an additional
+IRQ for the packet it already signaled (3rd packet in my example)
+
+This patch adds a new NAPI_STATE_MISSED bit, that napi_schedule_prep()
+can set if it could not grab NAPI_STATE_SCHED
+
+Then napi_complete_done() properly reschedules the napi to make sure
+we do not miss something.
+
+Since we manipulate multiple bits at once, use cmpxchg() like in
+sk_busy_loop() to provide proper transactions.
+
+In v2, I changed napi_watchdog() to use a relaxed variant of
+napi_schedule_prep() : No need to set NAPI_STATE_MISSED from this point.
+
+In v3, I added more details in the changelog and clears
+NAPI_STATE_MISSED in busy_poll_stop()
+
+In v4, I added the ideas given by Alexander Duyck in v3 review
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |   29 +++++-----------
+ net/core/dev.c            |   81 ++++++++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 83 insertions(+), 27 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -330,6 +330,7 @@ struct napi_struct {
+ 
+ enum {
+       NAPI_STATE_SCHED,       /* Poll is scheduled */
++      NAPI_STATE_MISSED,      /* reschedule a napi */
+       NAPI_STATE_DISABLE,     /* Disable pending */
+       NAPI_STATE_NPSVC,       /* Netpoll - don't dequeue from poll_list */
+       NAPI_STATE_HASHED,      /* In NAPI hash (busy polling possible) */
+@@ -338,12 +339,13 @@ enum {
+ };
+ 
+ enum {
+-      NAPIF_STATE_SCHED        = (1UL << NAPI_STATE_SCHED),
+-      NAPIF_STATE_DISABLE      = (1UL << NAPI_STATE_DISABLE),
+-      NAPIF_STATE_NPSVC        = (1UL << NAPI_STATE_NPSVC),
+-      NAPIF_STATE_HASHED       = (1UL << NAPI_STATE_HASHED),
+-      NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
+-      NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
++      NAPIF_STATE_SCHED        = BIT(NAPI_STATE_SCHED),
++      NAPIF_STATE_MISSED       = BIT(NAPI_STATE_MISSED),
++      NAPIF_STATE_DISABLE      = BIT(NAPI_STATE_DISABLE),
++      NAPIF_STATE_NPSVC        = BIT(NAPI_STATE_NPSVC),
++      NAPIF_STATE_HASHED       = BIT(NAPI_STATE_HASHED),
++      NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
++      NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ };
+ 
+ enum gro_result {
+@@ -413,20 +415,7 @@ static inline bool napi_disable_pending(
+       return test_bit(NAPI_STATE_DISABLE, &n->state);
+ }
+ 
+-/**
+- *    napi_schedule_prep - check if NAPI can be scheduled
+- *    @n: NAPI context
+- *
+- * Test if NAPI routine is already running, and if not mark
+- * it as running.  This is used as a condition variable to
+- * insure only one NAPI poll instance runs.  We also make
+- * sure there is no pending NAPI disable.
+- */
+-static inline bool napi_schedule_prep(struct napi_struct *n)
+-{
+-      return !napi_disable_pending(n) &&
+-              !test_and_set_bit(NAPI_STATE_SCHED, &n->state);
+-}
++bool napi_schedule_prep(struct napi_struct *n);
+ 
+ /**
+  *    napi_schedule - schedule NAPI poll
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4913,6 +4913,39 @@ void __napi_schedule(struct napi_struct
+ EXPORT_SYMBOL(__napi_schedule);
+ 
+ /**
++ *    napi_schedule_prep - check if napi can be scheduled
++ *    @n: napi context
++ *
++ * Test if NAPI routine is already running, and if not mark
++ * it as running.  This is used as a condition variable
++ * insure only one NAPI poll instance runs.  We also make
++ * sure there is no pending NAPI disable.
++ */
++bool napi_schedule_prep(struct napi_struct *n)
++{
++      unsigned long val, new;
++
++      do {
++              val = READ_ONCE(n->state);
++              if (unlikely(val & NAPIF_STATE_DISABLE))
++                      return false;
++              new = val | NAPIF_STATE_SCHED;
++
++              /* Sets STATE_MISSED bit if STATE_SCHED was already set
++               * This was suggested by Alexander Duyck, as compiler
++               * emits better code than :
++               * if (val & NAPIF_STATE_SCHED)
++               *     new |= NAPIF_STATE_MISSED;
++               */
++              new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
++                                                 NAPIF_STATE_MISSED;
++      } while (cmpxchg(&n->state, val, new) != val);
++
++      return !(val & NAPIF_STATE_SCHED);
++}
++EXPORT_SYMBOL(napi_schedule_prep);
++
++/**
+  * __napi_schedule_irqoff - schedule for receive
+  * @n: entry to schedule
+  *
+@@ -4943,7 +4976,7 @@ EXPORT_SYMBOL(__napi_complete);
+ 
+ bool napi_complete_done(struct napi_struct *n, int work_done)
+ {
+-      unsigned long flags;
++      unsigned long flags, val, new;
+ 
+       /*
+        * 1) Don't let napi dequeue from the cpu poll list
+@@ -4967,14 +5000,33 @@ bool napi_complete_done(struct napi_stru
+               else
+                       napi_gro_flush(n, false);
+       }
+-      if (likely(list_empty(&n->poll_list))) {
+-              WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+-      } else {
++      if (unlikely(!list_empty(&n->poll_list))) {
+               /* If n->poll_list is not empty, we need to mask irqs */
+               local_irq_save(flags);
+-              __napi_complete(n);
++              list_del_init(&n->poll_list);
+               local_irq_restore(flags);
+       }
++
++      do {
++              val = READ_ONCE(n->state);
++
++              WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
++
++              new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
++
++              /* If STATE_MISSED was set, leave STATE_SCHED set,
++               * because we will call napi->poll() one more time.
++               * This C code was suggested by Alexander Duyck to help gcc.
++               */
++              new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
++                                                  NAPIF_STATE_SCHED;
++      } while (cmpxchg(&n->state, val, new) != val);
++
++      if (unlikely(val & NAPIF_STATE_MISSED)) {
++              __napi_schedule(n);
++              return false;
++      }
++
+       return true;
+ }
+ EXPORT_SYMBOL(napi_complete_done);
+@@ -5000,6 +5052,16 @@ static void busy_poll_stop(struct napi_s
+ {
+       int rc;
+ 
++      /* Busy polling means there is a high chance device driver hard irq
++       * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
++       * set in napi_schedule_prep().
++       * Since we are about to call napi->poll() once more, we can safely
++       * clear NAPI_STATE_MISSED.
++       *
++       * Note: x86 could use a single "lock and ..." instruction
++       * to perform these two clear_bit()
++       */
++      clear_bit(NAPI_STATE_MISSED, &napi->state);
+       clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
+ 
+       local_bh_disable();
+@@ -5146,8 +5208,13 @@ static enum hrtimer_restart napi_watchdo
+       struct napi_struct *napi;
+ 
+       napi = container_of(timer, struct napi_struct, timer);
+-      if (napi->gro_list)
+-              napi_schedule(napi);
++
++      /* Note : we use a relaxed variant of napi_schedule_prep() not setting
++       * NAPI_STATE_MISSED, since we do not react to a device IRQ.
++       */
++      if (napi->gro_list && !napi_disable_pending(napi) &&
++          !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
++              __napi_schedule_irqoff(napi);
+ 
+       return HRTIMER_NORESTART;
+ }
diff --git a/queue-4.10/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch b/queue-4.10/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch

new file mode 100644 (file)

index 0000000..ccc3820
--- /dev/null
+++ b/queue-4.10/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
@@ -0,0 +1,111 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Andrey Ulanov <andreyu@google.com>
+Date: Tue, 14 Mar 2017 20:16:42 -0700
+Subject: net: unix: properly re-increment inflight counter of GC discarded candidates
+
+From: Andrey Ulanov <andreyu@google.com>
+
+
+[ Upstream commit 7df9c24625b9981779afb8fcdbe2bb4765e61147 ]
+
+Dmitry has reported that a BUG_ON() condition in unix_notinflight()
+may be triggered by a simple code that forwards unix socket in an
+SCM_RIGHTS message.
+That is caused by incorrect unix socket GC implementation in unix_gc().
+
+The GC first collects list of candidates, then (a) decrements their
+"children's" inflight counter, (b) checks which inflight counters are
+now 0, and then (c) increments all inflight counters back.
+(a) and (c) are done by calling scan_children() with inc_inflight or
+dec_inflight as the second argument.
+
+Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage
+collector") changed scan_children() such that it no longer considers
+sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block
+of code that that unsets this flag _before_ invoking
+scan_children(, dec_iflight, ). This may lead to incorrect inflight
+counters for some sockets.
+
+This change fixes this bug by changing order of operations:
+UNIX_GC_CANDIDATE is now unset only after all inflight counters are
+restored to the original state.
+
+  kernel BUG at net/unix/garbage.c:149!
+  RIP: 0010:[<ffffffff8717ebf4>]  [<ffffffff8717ebf4>]
+  unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149
+  Call Trace:
+   [<ffffffff8716cfbf>] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487
+   [<ffffffff8716f6a9>] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496
+   [<ffffffff86a90a01>] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655
+   [<ffffffff86a9808a>] skb_release_all+0x1a/0x60 net/core/skbuff.c:668
+   [<ffffffff86a980ea>] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684
+   [<ffffffff86a98284>] kfree_skb+0x184/0x570 net/core/skbuff.c:705
+   [<ffffffff871789d5>] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559
+   [<ffffffff87179039>] unix_release+0x49/0x90 net/unix/af_unix.c:836
+   [<ffffffff86a694b2>] sock_release+0x92/0x1f0 net/socket.c:570
+   [<ffffffff86a6962b>] sock_close+0x1b/0x20 net/socket.c:1017
+   [<ffffffff81a76b8e>] __fput+0x34e/0x910 fs/file_table.c:208
+   [<ffffffff81a771da>] ____fput+0x1a/0x20 fs/file_table.c:244
+   [<ffffffff81483ab0>] task_work_run+0x1a0/0x280 kernel/task_work.c:116
+   [<     inline     >] exit_task_work include/linux/task_work.h:21
+   [<ffffffff8141287a>] do_exit+0x183a/0x2640 kernel/exit.c:828
+   [<ffffffff8141383e>] do_group_exit+0x14e/0x420 kernel/exit.c:931
+   [<ffffffff814429d3>] get_signal+0x663/0x1880 kernel/signal.c:2307
+   [<ffffffff81239b45>] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807
+   [<ffffffff8100666a>] exit_to_usermode_loop+0x1ea/0x2d0
+  arch/x86/entry/common.c:156
+   [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
+   [<ffffffff81009693>] syscall_return_slowpath+0x4d3/0x570
+  arch/x86/entry/common.c:259
+   [<ffffffff881478e6>] entry_SYSCALL_64_fastpath+0xc4/0xc6
+
+Link: https://lkml.org/lkml/2017/3/6/252
+Signed-off-by: Andrey Ulanov <andreyu@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/garbage.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct
+       if (s) {
+               struct unix_sock *u = unix_sk(s);
+ 
++              BUG_ON(!atomic_long_read(&u->inflight));
+               BUG_ON(list_empty(&u->link));
+ 
+               if (atomic_long_dec_and_test(&u->inflight))
+@@ -341,6 +342,14 @@ void unix_gc(void)
+       }
+       list_del(&cursor);
+ 
++      /* Now gc_candidates contains only garbage.  Restore original
++       * inflight counters for these as well, and remove the skbuffs
++       * which are creating the cycle(s).
++       */
++      skb_queue_head_init(&hitlist);
++      list_for_each_entry(u, &gc_candidates, link)
++              scan_children(&u->sk, inc_inflight, &hitlist);
++
+       /* not_cycle_list contains those sockets which do not make up a
+        * cycle.  Restore these to the inflight list.
+        */
+@@ -350,14 +359,6 @@ void unix_gc(void)
+               list_move_tail(&u->link, &gc_inflight_list);
+       }
+ 
+-      /* Now gc_candidates contains only garbage.  Restore original
+-       * inflight counters for these as well, and remove the skbuffs
+-       * which are creating the cycle(s).
+-       */
+-      skb_queue_head_init(&hitlist);
+-      list_for_each_entry(u, &gc_candidates, link)
+-      scan_children(&u->sk, inc_inflight, &hitlist);
+-
+       spin_unlock(&unix_gc_lock);
+ 
+       /* Here we are. Hitlist is filled. Die. */
diff --git a/queue-4.10/net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch b/queue-4.10/net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch

new file mode 100644 (file)

index 0000000..662aee0
--- /dev/null
+++ b/queue-4.10/net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch
@@ -0,0 +1,41 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Fri, 17 Mar 2017 16:07:11 -0700
+Subject: net: vrf: Reset rt6i_idev in local dst after put
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 3dc857f0e8fc22610a59cbb346ba62c6e921863f ]
+
+The VRF driver takes a reference to the inet6_dev on the VRF device for
+its rt6_local dst when handling local traffic through the VRF device as
+a loopback. When the device is deleted the driver does a put on the idev
+but does not reset rt6i_idev in the rt6_info struct. When the dst is
+destroyed, dst_destroy calls ip6_dst_destroy which does a second put for
+what is essentially the same reference causing it to be prematurely freed.
+Reset rt6i_idev after the put in the vrf driver.
+
+Fixes: b4869aa2f881e ("net: vrf: ipv6 support for local traffic to
+                       local addresses")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -462,8 +462,10 @@ static void vrf_rt6_release(struct net_d
+       }
+ 
+       if (rt6_local) {
+-              if (rt6_local->rt6i_idev)
++              if (rt6_local->rt6i_idev) {
+                       in6_dev_put(rt6_local->rt6i_idev);
++                      rt6_local->rt6i_idev = NULL;
++              }
+ 
+               dst = &rt6_local->dst;
+               dev_put(dst->dev);
diff --git a/queue-4.10/openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch b/queue-4.10/openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch

new file mode 100644 (file)

index 0000000..59e1941
--- /dev/null
+++ b/queue-4.10/openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch
@@ -0,0 +1,34 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Kris Murphy <kriskend@linux.vnet.ibm.com>
+Date: Thu, 16 Mar 2017 10:51:28 -0500
+Subject: openvswitch: Add missing case OVS_TUNNEL_KEY_ATTR_PAD
+
+From: Kris Murphy <kriskend@linux.vnet.ibm.com>
+
+
+[ Upstream commit 8f3dbfd79ed9ef9770305a7cc4e13dfd31ad2cd0 ]
+
+Added a case for OVS_TUNNEL_KEY_ATTR_PAD to the switch statement
+in ip_tun_from_nlattr in order to prevent the default case
+returning an error.
+
+Fixes: b46f6ded906e ("libnl: nla_put_be64(): align on a 64-bit area")
+Signed-off-by: Kris Murphy <kriskend@linux.vnet.ibm.com>
+Acked-by: Joe Stringer <joe@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -649,6 +649,8 @@ static int ip_tun_from_nlattr(const stru
+                       tun_flags |= TUNNEL_VXLAN_OPT;
+                       opts_type = type;
+                       break;
++              case OVS_TUNNEL_KEY_ATTR_PAD:
++                      break;
+               default:
+                       OVS_NLERR(log, "Unknown IP tunnel attribute %d",
+                                 type);
diff --git a/queue-4.10/qmi_wwan-add-dell-dw5811e.patch b/queue-4.10/qmi_wwan-add-dell-dw5811e.patch

new file mode 100644 (file)

index 0000000..d1946ed
--- /dev/null
+++ b/queue-4.10/qmi_wwan-add-dell-dw5811e.patch
@@ -0,0 +1,32 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Bjørn Mork <bjorn@mork.no>
+Date: Fri, 17 Mar 2017 17:20:48 +0100
+Subject: qmi_wwan: add Dell DW5811e
+
+From: Bjørn Mork <bjorn@mork.no>
+
+
+[ Upstream commit 6bd845d1cf98b45c634baacb8381436dad3c2dd0 ]
+
+This is a Dell branded Sierra Wireless EM7455. It is operating in
+MBIM mode by default, but can be configured to provide two QMI/RMNET
+functions.
+
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -924,6 +924,8 @@ static const struct usb_device_id produc
+       {QMI_FIXED_INTF(0x413c, 0x81a9, 8)},    /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
+       {QMI_FIXED_INTF(0x413c, 0x81b1, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+       {QMI_FIXED_INTF(0x413c, 0x81b3, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
++      {QMI_FIXED_INTF(0x413c, 0x81b6, 8)},    /* Dell Wireless 5811e */
++      {QMI_FIXED_INTF(0x413c, 0x81b6, 10)},   /* Dell Wireless 5811e */
+       {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},    /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+       {QMI_FIXED_INTF(0x22de, 0x9061, 3)},    /* WeTelecom WPD-600N */
+       {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},    /* SIMCom 7230E */
diff --git a/queue-4.10/series b/queue-4.10/series

new file mode 100644 (file)

index 0000000..d1fb48b
--- /dev/null
+++ b/queue-4.10/series
@@ -0,0 +1,24 @@
+net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
+net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
+net-properly-release-sk_frag.page.patch
+amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
+openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch
+net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
+qmi_wwan-add-dell-dw5811e.patch
+net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch
+net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch
+net-mlx5e-change-the-tc-offload-rule-add-del-code-path-to-be-per-nic-or-e-switch.patch
+net-mlx5-e-switch-don-t-allow-changing-inline-mode-when-flows-are-configured.patch
+net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch
+net-mlx5e-avoid-supporting-udp-tunnel-port-ndo-for-vf-reps.patch
+net-mlx5-increase-number-of-max-qps-in-default-profile.patch
+net-mlx5e-count-gso-packets-correctly.patch
+net-mlx5e-count-lro-packets-correctly.patch
+ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch
+net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
+ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
+socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
+genetlink-fix-counting-regression-on-ctrl_dumpfamily.patch
+tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch
+amd-xgbe-fix-the-ecc-related-bit-position-definitions.patch
+net-solve-a-napi-race.patch
diff --git a/queue-4.10/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch b/queue-4.10/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch

new file mode 100644 (file)

index 0000000..46ddb92
--- /dev/null
+++ b/queue-4.10/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
@@ -0,0 +1,65 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 22 Mar 2017 13:08:08 +0100
+Subject: socket, bpf: fix sk_filter use after free in sk_clone_lock
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit a97e50cc4cb67e1e7bff56f6b41cda62ca832336 ]
+
+In sk_clone_lock(), we create a new socket and inherit most of the
+parent's members via sock_copy() which memcpy()'s various sections.
+Now, in case the parent socket had a BPF socket filter attached,
+then newsk->sk_filter points to the same instance as the original
+sk->sk_filter.
+
+sk_filter_charge() is then called on the newsk->sk_filter to take a
+reference and should that fail due to hitting max optmem, we bail
+out and release the newsk instance.
+
+The issue is that commit 278571baca2a ("net: filter: simplify socket
+charging") wrongly combined the dismantle path with the failure path
+of xfrm_sk_clone_policy(). This means, even when charging failed, we
+call sk_free_unlock_clone() on the newsk, which then still points to
+the same sk_filter as the original sk.
+
+Thus, sk_free_unlock_clone() calls into __sk_destruct() eventually
+where it tests for present sk_filter and calls sk_filter_uncharge()
+on it, which potentially lets sk_omem_alloc wrap around and releases
+the eBPF prog and sk_filter structure from the (still intact) parent.
+
+Fix it by making sure that when sk_filter_charge() failed, we reset
+newsk->sk_filter back to NULL before passing to sk_free_unlock_clone(),
+so that we don't mess with the parents sk_filter.
+
+Only if xfrm_sk_clone_policy() fails, we did reach the point where
+either the parent's filter was NULL and as a result newsk's as well
+or where we previously had a successful sk_filter_charge(), thus for
+that case, we do need sk_filter_uncharge() to release the prior taken
+reference on sk_filter.
+
+Fixes: 278571baca2a ("net: filter: simplify socket charging")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1545,6 +1545,12 @@ struct sock *sk_clone_lock(const struct
+                       is_charged = sk_filter_charge(newsk, filter);
+ 
+               if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
++                      /* We need to make sure that we don't uncharge the new
++                       * socket if we couldn't charge it in the first place
++                       * as otherwise we uncharge the parent's filter.
++                       */
++                      if (!is_charged)
++                              RCU_INIT_POINTER(newsk->sk_filter, NULL);
+                       /* It is still raw copy of parent, so invalidate
+                        * destructor and make plain sk_free() */
+                       newsk->sk_destruct = NULL;
diff --git a/queue-4.10/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch b/queue-4.10/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch

new file mode 100644 (file)

index 0000000..d4b7029
--- /dev/null
+++ b/queue-4.10/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch
@@ -0,0 +1,55 @@
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 22 Mar 2017 08:10:21 -0700
+Subject: tcp: initialize icsk_ack.lrcvtime at session start time
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 15bb7745e94a665caf42bfaabf0ce062845b533b ]
+
+icsk_ack.lrcvtime has a 0 value at socket creation time.
+
+tcpi_last_data_recv can have bogus value if no payload is ever received.
+
+This patch initializes icsk_ack.lrcvtime for active sessions
+in tcp_finish_connect(), and for passive sessions in
+tcp_create_openreq_child()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c     |    2 +-
+ net/ipv4/tcp_minisocks.c |    1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5571,6 +5571,7 @@ void tcp_finish_connect(struct sock *sk,
+       struct inet_connection_sock *icsk = inet_csk(sk);
+ 
+       tcp_set_state(sk, TCP_ESTABLISHED);
++      icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ 
+       if (skb) {
+               icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
+@@ -5789,7 +5790,6 @@ static int tcp_rcv_synsent_state_process
+                        * to stand against the temptation 8)     --ANK
+                        */
+                       inet_csk_schedule_ack(sk);
+-                      icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+                       tcp_enter_quickack_mode(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MAX, TCP_RTO_MAX);
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -466,6 +466,7 @@ struct sock *tcp_create_openreq_child(co
+               newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+               minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
+               newicsk->icsk_rto = TCP_TIMEOUT_INIT;
++              newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ 
+               newtp->packets_out = 0;
+               newtp->retrans_out = 0;
diff --git a/queue-4.9/series b/queue-4.9/series

new file mode 100644 (file)

index 0000000..38d832c
--- /dev/null
+++ b/queue-4.9/series
@@ -0,0 +1,18 @@
+net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
+net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
+net-properly-release-sk_frag.page.patch
+amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
+openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch
+net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
+qmi_wwan-add-dell-dw5811e.patch
+net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch
+net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch
+net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch
+net-mlx5-increase-number-of-max-qps-in-default-profile.patch
+net-mlx5e-count-gso-packets-correctly.patch
+net-mlx5e-count-lro-packets-correctly.patch
+ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch
+net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
+ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
+socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
+tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 27 Mar 2017 16:19:17 +0000 (18:19 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 27 Mar 2017 16:19:17 +0000 (18:19 +0200)
queue-4.10/amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/amd-xgbe-fix-the-ecc-related-bit-position-definitions.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/genetlink-fix-counting-regression-on-ctrl_dumpfamily.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5-e-switch-don-t-allow-changing-inline-mode-when-flows-are-configured.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5-increase-number-of-max-qps-in-default-profile.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5e-avoid-supporting-udp-tunnel-port-ndo-for-vf-reps.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5e-change-the-tc-offload-rule-add-del-code-path-to-be-per-nic-or-e-switch.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5e-count-gso-packets-correctly.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5e-count-lro-packets-correctly.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-properly-release-sk_frag.page.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-solve-a-napi-race.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/qmi_wwan-add-dell-dw5811e.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/series	[new file with mode: 0644]	patch \| blob
queue-4.10/socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series	[new file with mode: 0644]	patch \| blob