--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+Date: Wed, 15 Mar 2017 15:11:23 -0500
+Subject: amd-xgbe: Fix jumbo MTU processing on newer hardware
+
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+
+
+[ Upstream commit 622c36f143fc9566ba49d7cec994c2da1182d9e2 ]
+
+Newer hardware does not provide a cumulative payload length when multiple
+descriptors are needed to handle the data. Once the MTU increases beyond
+the size that can be handled by a single descriptor, the SKB does not get
+built properly by the driver.
+
+The driver will now calculate the size of the data buffers used by the
+hardware. The first buffer of the first descriptor is for packet headers
+or packet headers and data when the headers can't be split. Subsequent
+descriptors in a multi-descriptor chain will not use the first buffer. The
+second buffer is used by all the descriptors in the chain for payload data.
+Based on whether the driver is processing the first, intermediate, or last
+descriptor it can calculate the buffer usage and build the SKB properly.
+
+Tested and verified on both old and new hardware.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-common.h | 6 +
+ drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 20 +++--
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 102 +++++++++++++++++-----------
+ 3 files changed, 78 insertions(+), 50 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+@@ -1148,8 +1148,8 @@
+ #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH 1
+ #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX 1
+ #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH 1
+-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX 2
+-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH 1
++#define RX_PACKET_ATTRIBUTES_LAST_INDEX 2
++#define RX_PACKET_ATTRIBUTES_LAST_WIDTH 1
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX 3
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH 1
+ #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX 4
+@@ -1158,6 +1158,8 @@
+ #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH 1
+ #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX 6
+ #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH 1
++#define RX_PACKET_ATTRIBUTES_FIRST_INDEX 7
++#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH 1
+
+ #define RX_NORMAL_DESC0_OVT_INDEX 0
+ #define RX_NORMAL_DESC0_OVT_WIDTH 16
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+@@ -1896,10 +1896,15 @@ static int xgbe_dev_read(struct xgbe_cha
+
+ /* Get the header length */
+ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) {
++ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++ FIRST, 1);
+ rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
+ RX_NORMAL_DESC2, HL);
+ if (rdata->rx.hdr_len)
+ pdata->ext_stats.rx_split_header_packets++;
++ } else {
++ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++ FIRST, 0);
+ }
+
+ /* Get the RSS hash */
+@@ -1922,19 +1927,16 @@ static int xgbe_dev_read(struct xgbe_cha
+ }
+ }
+
+- /* Get the packet length */
+- rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+-
+- if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
+- /* Not all the data has been transferred for this packet */
+- XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+- INCOMPLETE, 1);
++ /* Not all the data has been transferred for this packet */
++ if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD))
+ return 0;
+- }
+
+ /* This is the last of the data for this packet */
+ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+- INCOMPLETE, 0);
++ LAST, 1);
++
++ /* Get the packet length */
++ rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+
+ /* Set checksum done indicator as appropriate */
+ if (netdev->features & NETIF_F_RXCSUM)
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -1973,13 +1973,12 @@ static struct sk_buff *xgbe_create_skb(s
+ {
+ struct sk_buff *skb;
+ u8 *packet;
+- unsigned int copy_len;
+
+ skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len);
+ if (!skb)
+ return NULL;
+
+- /* Start with the header buffer which may contain just the header
++ /* Pull in the header buffer which may contain just the header
+ * or the header plus data
+ */
+ dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base,
+@@ -1988,30 +1987,49 @@ static struct sk_buff *xgbe_create_skb(s
+
+ packet = page_address(rdata->rx.hdr.pa.pages) +
+ rdata->rx.hdr.pa.pages_offset;
+- copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len;
+- copy_len = min(rdata->rx.hdr.dma_len, copy_len);
+- skb_copy_to_linear_data(skb, packet, copy_len);
+- skb_put(skb, copy_len);
+-
+- len -= copy_len;
+- if (len) {
+- /* Add the remaining data as a frag */
+- dma_sync_single_range_for_cpu(pdata->dev,
+- rdata->rx.buf.dma_base,
+- rdata->rx.buf.dma_off,
+- rdata->rx.buf.dma_len,
+- DMA_FROM_DEVICE);
+-
+- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+- rdata->rx.buf.pa.pages,
+- rdata->rx.buf.pa.pages_offset,
+- len, rdata->rx.buf.dma_len);
+- rdata->rx.buf.pa.pages = NULL;
+- }
++ skb_copy_to_linear_data(skb, packet, len);
++ skb_put(skb, len);
+
+ return skb;
+ }
+
++static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata,
++ struct xgbe_packet_data *packet)
++{
++ /* Always zero if not the first descriptor */
++ if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST))
++ return 0;
++
++ /* First descriptor with split header, return header length */
++ if (rdata->rx.hdr_len)
++ return rdata->rx.hdr_len;
++
++ /* First descriptor but not the last descriptor and no split header,
++ * so the full buffer was used
++ */
++ if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
++ return rdata->rx.hdr.dma_len;
++
++ /* First descriptor and last descriptor and no split header, so
++ * calculate how much of the buffer was used
++ */
++ return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len);
++}
++
++static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata,
++ struct xgbe_packet_data *packet,
++ unsigned int len)
++{
++ /* Always the full buffer if not the last descriptor */
++ if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
++ return rdata->rx.buf.dma_len;
++
++ /* Last descriptor so calculate how much of the buffer was used
++ * for the last bit of data
++ */
++ return rdata->rx.len - len;
++}
++
+ static int xgbe_tx_poll(struct xgbe_channel *channel)
+ {
+ struct xgbe_prv_data *pdata = channel->pdata;
+@@ -2094,8 +2112,8 @@ static int xgbe_rx_poll(struct xgbe_chan
+ struct napi_struct *napi;
+ struct sk_buff *skb;
+ struct skb_shared_hwtstamps *hwtstamps;
+- unsigned int incomplete, error, context_next, context;
+- unsigned int len, rdesc_len, max_len;
++ unsigned int last, error, context_next, context;
++ unsigned int len, buf1_len, buf2_len, max_len;
+ unsigned int received = 0;
+ int packet_count = 0;
+
+@@ -2105,7 +2123,7 @@ static int xgbe_rx_poll(struct xgbe_chan
+ if (!ring)
+ return 0;
+
+- incomplete = 0;
++ last = 0;
+ context_next = 0;
+
+ napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+@@ -2139,9 +2157,8 @@ read_again:
+ received++;
+ ring->cur++;
+
+- incomplete = XGMAC_GET_BITS(packet->attributes,
+- RX_PACKET_ATTRIBUTES,
+- INCOMPLETE);
++ last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
++ LAST);
+ context_next = XGMAC_GET_BITS(packet->attributes,
+ RX_PACKET_ATTRIBUTES,
+ CONTEXT_NEXT);
+@@ -2150,7 +2167,7 @@ read_again:
+ CONTEXT);
+
+ /* Earlier error, just drain the remaining data */
+- if ((incomplete || context_next) && error)
++ if ((!last || context_next) && error)
+ goto read_again;
+
+ if (error || packet->errors) {
+@@ -2162,16 +2179,22 @@ read_again:
+ }
+
+ if (!context) {
+- /* Length is cumulative, get this descriptor's length */
+- rdesc_len = rdata->rx.len - len;
+- len += rdesc_len;
++ /* Get the data length in the descriptor buffers */
++ buf1_len = xgbe_rx_buf1_len(rdata, packet);
++ len += buf1_len;
++ buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
++ len += buf2_len;
+
+- if (rdesc_len && !skb) {
++ if (!skb) {
+ skb = xgbe_create_skb(pdata, napi, rdata,
+- rdesc_len);
+- if (!skb)
++ buf1_len);
++ if (!skb) {
+ error = 1;
+- } else if (rdesc_len) {
++ goto skip_data;
++ }
++ }
++
++ if (buf2_len) {
+ dma_sync_single_range_for_cpu(pdata->dev,
+ rdata->rx.buf.dma_base,
+ rdata->rx.buf.dma_off,
+@@ -2181,13 +2204,14 @@ read_again:
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ rdata->rx.buf.pa.pages,
+ rdata->rx.buf.pa.pages_offset,
+- rdesc_len,
++ buf2_len,
+ rdata->rx.buf.dma_len);
+ rdata->rx.buf.pa.pages = NULL;
+ }
+ }
+
+- if (incomplete || context_next)
++skip_data:
++ if (!last || context_next)
+ goto read_again;
+
+ if (!skb)
+@@ -2245,7 +2269,7 @@ next_packet:
+ }
+
+ /* Check if we need to save state before leaving */
+- if (received && (incomplete || context_next)) {
++ if (received && (!last || context_next)) {
+ rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+ rdata->state_saved = 1;
+ rdata->state.skb = skb;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+Date: Wed, 22 Mar 2017 17:25:27 -0500
+Subject: amd-xgbe: Fix the ECC-related bit position definitions
+
+From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
+
+
+[ Upstream commit f43feef4e6acde10857fcbfdede790d6b3f2c71d ]
+
+The ECC bit positions that describe whether the ECC interrupt is for
+Tx, Rx or descriptor memory and whether the it is a single correctable
+or double detected error were defined in incorrectly (reversed order).
+Fix the bit position definitions for these settings so that the proper
+ECC handling is performed.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-common.h | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+@@ -984,29 +984,29 @@
+ #define XP_ECC_CNT1_DESC_DED_WIDTH 8
+ #define XP_ECC_CNT1_DESC_SEC_INDEX 0
+ #define XP_ECC_CNT1_DESC_SEC_WIDTH 8
+-#define XP_ECC_IER_DESC_DED_INDEX 0
++#define XP_ECC_IER_DESC_DED_INDEX 5
+ #define XP_ECC_IER_DESC_DED_WIDTH 1
+-#define XP_ECC_IER_DESC_SEC_INDEX 1
++#define XP_ECC_IER_DESC_SEC_INDEX 4
+ #define XP_ECC_IER_DESC_SEC_WIDTH 1
+-#define XP_ECC_IER_RX_DED_INDEX 2
++#define XP_ECC_IER_RX_DED_INDEX 3
+ #define XP_ECC_IER_RX_DED_WIDTH 1
+-#define XP_ECC_IER_RX_SEC_INDEX 3
++#define XP_ECC_IER_RX_SEC_INDEX 2
+ #define XP_ECC_IER_RX_SEC_WIDTH 1
+-#define XP_ECC_IER_TX_DED_INDEX 4
++#define XP_ECC_IER_TX_DED_INDEX 1
+ #define XP_ECC_IER_TX_DED_WIDTH 1
+-#define XP_ECC_IER_TX_SEC_INDEX 5
++#define XP_ECC_IER_TX_SEC_INDEX 0
+ #define XP_ECC_IER_TX_SEC_WIDTH 1
+-#define XP_ECC_ISR_DESC_DED_INDEX 0
++#define XP_ECC_ISR_DESC_DED_INDEX 5
+ #define XP_ECC_ISR_DESC_DED_WIDTH 1
+-#define XP_ECC_ISR_DESC_SEC_INDEX 1
++#define XP_ECC_ISR_DESC_SEC_INDEX 4
+ #define XP_ECC_ISR_DESC_SEC_WIDTH 1
+-#define XP_ECC_ISR_RX_DED_INDEX 2
++#define XP_ECC_ISR_RX_DED_INDEX 3
+ #define XP_ECC_ISR_RX_DED_WIDTH 1
+-#define XP_ECC_ISR_RX_SEC_INDEX 3
++#define XP_ECC_ISR_RX_SEC_INDEX 2
+ #define XP_ECC_ISR_RX_SEC_WIDTH 1
+-#define XP_ECC_ISR_TX_DED_INDEX 4
++#define XP_ECC_ISR_TX_DED_INDEX 1
+ #define XP_ECC_ISR_TX_DED_WIDTH 1
+-#define XP_ECC_ISR_TX_SEC_INDEX 5
++#define XP_ECC_ISR_TX_SEC_INDEX 0
+ #define XP_ECC_ISR_TX_SEC_WIDTH 1
+ #define XP_I2C_MUTEX_BUSY_INDEX 31
+ #define XP_I2C_MUTEX_BUSY_WIDTH 1
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+Date: Wed, 22 Mar 2017 16:08:33 +0100
+Subject: genetlink: fix counting regression on ctrl_dumpfamily()
+
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+
+
+[ Upstream commit 1d2a6a5e4bf2921531071fcff8538623dce74efa ]
+
+Commit 2ae0f17df1cd ("genetlink: use idr to track families") replaced
+
+ if (++n < fams_to_skip)
+ continue;
+into:
+
+ if (n++ < fams_to_skip)
+ continue;
+
+This subtle change cause that on retry ctrl_dumpfamily() call we omit
+one family that failed to do ctrl_fill_info() on previous call, because
+cb->args[0] = n number counts also family that failed to do
+ctrl_fill_info().
+
+Patch fixes the problem and avoid confusion in the future just decrease
+n counter when ctrl_fill_info() fail.
+
+User visible problem caused by this bug is failure to get access to
+some genetlink family i.e. nl80211. However problem is reproducible
+only if number of registered genetlink families is big enough to
+cause second call of ctrl_dumpfamily().
+
+Cc: Xose Vazquez Perez <xose.vazquez@gmail.com>
+Cc: Larry Finger <Larry.Finger@lwfinger.net>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Fixes: 2ae0f17df1cd ("genetlink: use idr to track families")
+Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/genetlink.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/netlink/genetlink.c
++++ b/net/netlink/genetlink.c
+@@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buf
+
+ if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+- skb, CTRL_CMD_NEWFAMILY) < 0)
++ skb, CTRL_CMD_NEWFAMILY) < 0) {
++ n--;
+ break;
++ }
+ }
+
+ cb->args[0] = n;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 21 Mar 2017 19:22:28 -0700
+Subject: ipv4: provide stronger user input validation in nl_fib_input()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c64c0b3cac4c5b8cb093727d2c19743ea3965c0b ]
+
+Alexander reported a KMSAN splat caused by reads of uninitialized
+field (tb_id_in) from user provided struct fib_result_nl
+
+It turns out nl_fib_input() sanity tests on user input is a bit
+wrong :
+
+User can pretend nlh->nlmsg_len is big enough, but provide
+at sendmsg() time a too small buffer.
+
+Reported-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1082,7 +1082,8 @@ static void nl_fib_input(struct sk_buff
+
+ net = sock_net(skb->sk);
+ nlh = nlmsg_hdr(skb);
+- if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
++ if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
++ skb->len < nlh->nlmsg_len ||
+ nlmsg_len(nlh) < sizeof(*frn))
+ return;
+
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Tue, 21 Mar 2017 17:14:27 +0100
+Subject: ipv6: make sure to initialize sockc.tsflags before first use
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit d515684d78148884d5fc425ba904c50f03844020 ]
+
+In the case udp_sk(sk)->pending is AF_INET6, udpv6_sendmsg() would
+jump to do_append_data, skipping the initialization of sockc.tsflags.
+Fix the problem by moving sockc.tsflags initialization earlier.
+
+The bug was detected with KMSAN.
+
+Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/udp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1022,6 +1022,7 @@ int udpv6_sendmsg(struct sock *sk, struc
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
++ sockc.tsflags = sk->sk_tsflags;
+
+ /* destination address check */
+ if (sin6) {
+@@ -1146,7 +1147,6 @@ do_udp_sendmsg:
+
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_uid = sk->sk_uid;
+- sockc.tsflags = sk->sk_tsflags;
+
+ if (msg->msg_controllen) {
+ opt = &opt_space;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 15 Mar 2017 12:57:21 -0700
+Subject: net: bcmgenet: Do not suspend PHY if Wake-on-LAN is enabled
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 5371bbf4b295eea334ed453efa286afa2c3ccff3 ]
+
+Suspending the PHY would be putting it in a low power state where it
+may no longer allow us to do Wake-on-LAN.
+
+Fixes: cc013fb48898 ("net: bcmgenet: correctly suspend and resume PHY device")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -3395,7 +3395,8 @@ static int bcmgenet_suspend(struct devic
+
+ bcmgenet_netif_stop(dev);
+
+- phy_suspend(priv->phydev);
++ if (!device_may_wakeup(d))
++ phy_suspend(priv->phydev);
+
+ netif_device_detach(dev);
+
+@@ -3492,7 +3493,8 @@ static int bcmgenet_resume(struct device
+
+ netif_device_attach(dev);
+
+- phy_resume(priv->phydev);
++ if (!device_may_wakeup(d))
++ phy_resume(priv->phydev);
+
+ if (priv->eee.eee_enabled)
+ bcmgenet_eee_enable_set(dev, true);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Doug Berger <opendmb@gmail.com>
+Date: Tue, 21 Mar 2017 14:01:06 -0700
+Subject: net: bcmgenet: remove bcmgenet_internal_phy_setup()
+
+From: Doug Berger <opendmb@gmail.com>
+
+
+[ Upstream commit 31739eae738ccbe8b9d627c3f2251017ca03f4d2 ]
+
+Commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
+removed the bcmgenet_mii_reset() function from bcmgenet_power_up() and
+bcmgenet_internal_phy_setup() functions. In so doing it broke the reset
+of the internal PHY devices used by the GENETv1-GENETv3 which required
+this reset before the UniMAC was enabled. It also broke the internal
+GPHY devices used by the GENETv4 because the config_init that installed
+the AFE workaround was no longer occurring after the reset of the GPHY
+performed by bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup().
+In addition the code in bcmgenet_internal_phy_setup() related to the
+"enable APD" comment goes with the bcmgenet_mii_reset() so it should
+have also been removed.
+
+Commit bd4060a6108b ("net: bcmgenet: Power on integrated GPHY in
+bcmgenet_power_up()") moved the bcmgenet_phy_power_set() call to the
+bcmgenet_power_up() function, but failed to remove it from the
+bcmgenet_internal_phy_setup() function. Had it done so, the
+bcmgenet_internal_phy_setup() function would have been empty and could
+have been removed at that time.
+
+Commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on")
+was submitted to correct the functional problems introduced by
+commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset"). It
+was included in v4.4 and made available on 4.3-stable. Unfortunately,
+it didn't fully revert the commit because this bcmgenet_mii_reset()
+doesn't apply the soft reset to the internal GPHY used by GENETv4 like
+the previous one did. This prevents the restoration of the AFE work-
+arounds for internal GPHY devices after the bcmgenet_phy_power_set() in
+bcmgenet_internal_phy_setup().
+
+This commit takes the alternate approach of removing the unnecessary
+bcmgenet_internal_phy_setup() function which shouldn't have been in v4.3
+so that when bcmgenet_mii_reset() was restored it should have only gone
+into bcmgenet_power_up(). This will avoid the problems while also
+removing the redundancy (and hopefully some of the confusion).
+
+Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmmii.c | 15 ---------------
+ 1 file changed, 15 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+@@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_d
+ udelay(60);
+ }
+
+-static void bcmgenet_internal_phy_setup(struct net_device *dev)
+-{
+- struct bcmgenet_priv *priv = netdev_priv(dev);
+- u32 reg;
+-
+- /* Power up PHY */
+- bcmgenet_phy_power_set(dev, true);
+- /* enable APD */
+- reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
+- reg |= EXT_PWR_DN_EN_LD;
+- bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+- bcmgenet_mii_reset(dev);
+-}
+-
+ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
+ {
+ u32 reg;
+@@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_devic
+
+ if (priv->internal_phy) {
+ phy_name = "internal PHY";
+- bcmgenet_internal_phy_setup(dev);
+ } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
+ phy_name = "MoCA";
+ bcmgenet_moca_phy_setup(priv);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:12 +0200
+Subject: net/mlx5: Add missing entries for set/query rate limit commands
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 1f30a86c58093046dc3e49c23d2618894e098f7a ]
+
+The switch cases for the rate limit set and query commands were
+missing, which could get us wrong under fw error or driver reset
+flow, fix that.
+
+Fixes: 1466cc5b23d1 ('net/mlx5: Rate limit tables support')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -361,6 +361,8 @@ static int mlx5_internal_err_ret_value(s
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
++ case MLX5_CMD_OP_SET_RATE_LIMIT:
++ case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_UAR:
+ case MLX5_CMD_OP_CONFIG_INT_MODERATION:
+@@ -497,6 +499,8 @@ const char *mlx5_command_str(int command
+ MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
++ MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
++ MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(ALLOC_PD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PD);
+ MLX5_COMMAND_STR_CASE(ALLOC_UAR);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Roi Dayan <roid@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:14 +0200
+Subject: net/mlx5: E-Switch, Don't allow changing inline mode when flows are configured
+
+From: Roi Dayan <roid@mellanox.com>
+
+
+[ Upstream commit 375f51e2b5b7b9a42b3139aea519cbb1bfc5d6ef ]
+
+Changing the eswitch inline mode can potentially cause already configured
+flows not to match the policy. E.g. set policy L4, add some L4 rules,
+set policy to L2 --> bad! Hence we disallow it.
+
+Keep track of how many offloaded rules are now set and refuse
+inline mode changes if this isn't zero.
+
+Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
+Signed-off-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 +
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 ++++++++
+ 2 files changed, 9 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -201,6 +201,7 @@ struct mlx5_esw_offload {
+ struct mlx5_eswitch_rep *vport_reps;
+ DECLARE_HASHTABLE(encap_tbl, 8);
+ u8 inline_mode;
++ u64 num_flows;
+ };
+
+ struct mlx5_eswitch {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -93,6 +93,8 @@ mlx5_eswitch_add_offloaded_rule(struct m
+ spec, &flow_act, dest, i);
+ if (IS_ERR(rule))
+ mlx5_fc_destroy(esw->dev, counter);
++ else
++ esw->offloads.num_flows++;
+
+ return rule;
+ }
+@@ -108,6 +110,7 @@ mlx5_eswitch_del_offloaded_rule(struct m
+ counter = mlx5_flow_rule_counter(rule);
+ mlx5_del_flow_rules(rule);
+ mlx5_fc_destroy(esw->dev, counter);
++ esw->offloads.num_flows--;
+ }
+ }
+
+@@ -919,6 +922,11 @@ int mlx5_devlink_eswitch_inline_mode_set
+ MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+ return -EOPNOTSUPP;
+
++ if (esw->offloads.num_flows > 0) {
++ esw_warn(dev, "Can't set inline mode when flows are configured\n");
++ return -EOPNOTSUPP;
++ }
++
+ err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+ if (err)
+ goto out;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Maor Gottlieb <maorg@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:17 +0200
+Subject: net/mlx5: Increase number of max QPs in default profile
+
+From: Maor Gottlieb <maorg@mellanox.com>
+
+
+[ Upstream commit 5f40b4ed975c26016cf41953b7510fe90718e21c ]
+
+With ConnectX-4 sharing SRQs from the same space as QPs, we hit a
+limit preventing some applications to allocate needed QPs amount.
+Double the size to 256K.
+
+Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
+Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = {
+ [2] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE |
+ MLX5_PROF_MASK_MR_CACHE,
+- .log_max_qp = 17,
++ .log_max_qp = 18,
+ .mr_cache[0] = {
+ .size = 500,
+ .limit = 250
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Paul Blakey <paulb@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:16 +0200
+Subject: net/mlx5e: Avoid supporting udp tunnel port ndo for VF reps
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit 1ad9a00ae0efc2e9337148d6c382fad3d27bf99a ]
+
+This was added to allow the TC offloading code to identify offloading
+encap/decap vxlan rules.
+
+The VF reps are effectively related to the same mlx5 PCI device as the
+PF. Since the kernel invokes the (say) delete ndo for each netdev, the
+FW erred on multiple vxlan dst port deletes when the port was deleted
+from the system.
+
+We fix that by keeping the registration to be carried out only by the
+PF. Since the PF serves as the uplink device, the VF reps will look
+up a port there and realize if they are ok to offload that.
+
+Tested:
+ <SETUP VFS>
+ <SETUP switchdev mode to have representors>
+ ip link add vxlan1 type vxlan id 44 dev ens5f0 dstport 9999
+ ip link set vxlan1 up
+ ip link del dev vxlan1
+
+Fixes: 4a25730eb202 ('net/mlx5e: Add ndo_udp_tunnel_add to VF representors')
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ----
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++----
+ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 --
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++--
+ 4 files changed, 11 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -921,10 +921,6 @@ void mlx5e_destroy_netdev(struct mlx5_co
+ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
+-void mlx5e_add_vxlan_port(struct net_device *netdev,
+- struct udp_tunnel_info *ti);
+-void mlx5e_del_vxlan_port(struct net_device *netdev,
+- struct udp_tunnel_info *ti);
+
+ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3055,8 +3055,8 @@ static int mlx5e_get_vf_stats(struct net
+ vf_stats);
+ }
+
+-void mlx5e_add_vxlan_port(struct net_device *netdev,
+- struct udp_tunnel_info *ti)
++static void mlx5e_add_vxlan_port(struct net_device *netdev,
++ struct udp_tunnel_info *ti)
+ {
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+@@ -3069,8 +3069,8 @@ void mlx5e_add_vxlan_port(struct net_dev
+ mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
+ }
+
+-void mlx5e_del_vxlan_port(struct net_device *netdev,
+- struct udp_tunnel_info *ti)
++static void mlx5e_del_vxlan_port(struct net_device *netdev,
++ struct udp_tunnel_info *ti)
+ {
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -394,8 +394,6 @@ static const struct net_device_ops mlx5e
+ .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
+ .ndo_setup_tc = mlx5e_rep_ndo_setup_tc,
+ .ndo_get_stats64 = mlx5e_rep_get_stats,
+- .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
+- .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
+ };
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -264,12 +264,15 @@ static int parse_tunnel_attr(struct mlx5
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->mask);
++ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++ struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
++ struct mlx5e_priv *up_priv = netdev_priv(up_dev);
+
+ /* Full udp dst port must be given */
+ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
+ goto vxlan_match_offload_err;
+
+- if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
++ if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+ parse_vxlan_attr(spec, f);
+ else {
+@@ -827,6 +830,8 @@ static int mlx5e_attach_encap(struct mlx
+ struct mlx5_esw_flow_attr *attr)
+ {
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++ struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
++ struct mlx5e_priv *up_priv = netdev_priv(up_dev);
+ unsigned short family = ip_tunnel_info_af(tun_info);
+ struct ip_tunnel_key *key = &tun_info->key;
+ struct mlx5_encap_info info;
+@@ -849,7 +854,7 @@ vxlan_encap_offload_err:
+ return -EOPNOTSUPP;
+ }
+
+- if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
++ if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
+ info.tp_dst = key->tp_dst;
+ info.tun_id = tunnel_id_to_key32(key->tun_id);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:13 +0200
+Subject: net/mlx5e: Change the TC offload rule add/del code path to be per NIC or E-Switch
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit d85cdccbb3fe9a632ec9d0f4e4526c8c84fc3523 ]
+
+Refactor the code to deal with add/del TC rules to have handler per NIC/E-switch
+offloading use case, and push the latter into the e-switch code. This provides
+better separation and is to be used in down-stream patch for applying a fix.
+
+Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 57 ++++++++-----
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++
+ 3 files changed, 58 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -128,6 +128,23 @@ err_create_ft:
+ return rule;
+ }
+
++static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
++ struct mlx5e_tc_flow *flow)
++{
++ struct mlx5_fc *counter = NULL;
++
++ if (!IS_ERR(flow->rule)) {
++ counter = mlx5_flow_rule_counter(flow->rule);
++ mlx5_del_flow_rules(flow->rule);
++ mlx5_fc_destroy(priv->mdev, counter);
++ }
++
++ if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
++ mlx5_destroy_flow_table(priv->fs.tc.t);
++ priv->fs.tc.t = NULL;
++ }
++}
++
+ static struct mlx5_flow_handle *
+ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+@@ -144,7 +161,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
+ }
+
+ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+- struct mlx5e_tc_flow *flow) {
++ struct mlx5e_tc_flow *flow);
++
++static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
++ struct mlx5e_tc_flow *flow)
++{
++ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++
++ mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr);
++
++ mlx5_eswitch_del_vlan_action(esw, flow->attr);
++
++ if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
++ mlx5e_detach_encap(priv, flow);
++}
++
++static void mlx5e_detach_encap(struct mlx5e_priv *priv,
++ struct mlx5e_tc_flow *flow)
++{
+ struct list_head *next = flow->encap.next;
+
+ list_del(&flow->encap);
+@@ -169,24 +203,11 @@ static void mlx5e_tc_del_flow(struct mlx
+ struct mlx5e_tc_flow *flow)
+ {
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+- struct mlx5_fc *counter = NULL;
+
+- if (!IS_ERR(flow->rule)) {
+- counter = mlx5_flow_rule_counter(flow->rule);
+- mlx5_del_flow_rules(flow->rule);
+- mlx5_fc_destroy(priv->mdev, counter);
+- }
+-
+- if (esw && esw->mode == SRIOV_OFFLOADS) {
+- mlx5_eswitch_del_vlan_action(esw, flow->attr);
+- if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+- mlx5e_detach_encap(priv, flow);
+- }
+-
+- if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+- mlx5_destroy_flow_table(priv->fs.tc.t);
+- priv->fs.tc.t = NULL;
+- }
++ if (esw && esw->mode == SRIOV_OFFLOADS)
++ mlx5e_tc_del_fdb_flow(priv, flow);
++ else
++ mlx5e_tc_del_nic_flow(priv, flow);
+ }
+
+ static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -263,6 +263,11 @@ struct mlx5_flow_handle *
+ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr);
++void
++mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
++ struct mlx5_flow_handle *rule,
++ struct mlx5_esw_flow_attr *attr);
++
+ struct mlx5_flow_handle *
+ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -97,6 +97,20 @@ mlx5_eswitch_add_offloaded_rule(struct m
+ return rule;
+ }
+
++void
++mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
++ struct mlx5_flow_handle *rule,
++ struct mlx5_esw_flow_attr *attr)
++{
++ struct mlx5_fc *counter = NULL;
++
++ if (!IS_ERR(rule)) {
++ counter = mlx5_flow_rule_counter(rule);
++ mlx5_del_flow_rules(rule);
++ mlx5_fc_destroy(esw->dev, counter);
++ }
++}
++
+ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+ {
+ struct mlx5_eswitch_rep *rep;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:18 +0200
+Subject: net/mlx5e: Count GSO packets correctly
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit d3a4e4da54c7adb420d5f48e89be913b14bdeff1 ]
+
+TX packets statistics ('tx_packets' counter) used to count GSO packets
+as one, even though it contains multiple segments.
+This patch will increment the counter by the number of segments, and
+align the driver with the behavior of other drivers in the stack.
+
+Note that no information is lost in this patch due to 'tx_tso_packets'
+counter existence.
+
+Before, ethtool showed:
+$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
+ tx_packets: 61340
+ tx_tso_packets: 60954
+ tx_packets_phy: 2451115
+
+Now, we will see the more logical statistics:
+$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
+ tx_packets: 2451115
+ tx_tso_packets: 60954
+ tx_packets_phy: 2451115
+
+Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -272,15 +272,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct
+ sq->stats.tso_bytes += skb->len - ihs;
+ }
+
++ sq->stats.packets += skb_shinfo(skb)->gso_segs;
+ num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
+ } else {
+ bf = sq->bf_budget &&
+ !skb->xmit_more &&
+ !skb_shinfo(skb)->nr_frags;
+ ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
++ sq->stats.packets++;
+ num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ }
+
++ sq->stats.bytes += num_bytes;
+ wi->num_bytes = num_bytes;
+
+ if (skb_vlan_tag_present(skb)) {
+@@ -377,8 +380,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct
+ if (bf)
+ sq->bf_budget--;
+
+- sq->stats.packets++;
+- sq->stats.bytes += num_bytes;
+ return NETDEV_TX_OK;
+
+ dma_unmap_wqe_err:
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:19 +0200
+Subject: net/mlx5e: Count LRO packets correctly
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit 8ab7e2ae15d84ba758b2c8c6f4075722e9bd2a08 ]
+
+RX packets statistics ('rx_packets' counter) used to count LRO packets
+as one, even though it contains multiple segments.
+This patch will increment the counter by the number of segments, and
+align the driver with the behavior of other drivers in the stack.
+
+Note that no information is lost in this patch due to 'rx_lro_packets'
+counter existence.
+
+Before, ethtool showed:
+$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
+ rx_packets: 435277
+ rx_lro_packets: 35847
+ rx_packets_phy: 1935066
+
+Now, we will see the more logical statistics:
+$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
+ rx_packets: 1935066
+ rx_lro_packets: 35847
+ rx_packets_phy: 1935066
+
+Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -603,6 +603,10 @@ static inline void mlx5e_build_rx_skb(st
+ if (lro_num_seg > 1) {
+ mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
+ skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
++ /* Subtract one since we already counted this as one
++ * "regular" packet in mlx5e_complete_rx_cqe()
++ */
++ rq->stats.packets += lro_num_seg - 1;
+ rq->stats.lro_packets++;
+ rq->stats.lro_bytes += cqe_bcnt;
+ }
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 21 Mar 2017 15:59:15 +0200
+Subject: net/mlx5e: Use the proper UAPI values when offloading TC vlan actions
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 09c91ddf2cd33489c2c14edfef43ae38d412888e ]
+
+Currently we use the non UAPI values and we miss erring on
+the modify action which is not supported, fix that.
+
+Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Petr Machata <petrm@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -974,14 +974,16 @@ static int parse_tc_fdb_actions(struct m
+ }
+
+ if (is_tcf_vlan(a)) {
+- if (tcf_vlan_action(a) == VLAN_F_POP) {
++ if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+- } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
++ } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+ if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
+ return -EOPNOTSUPP;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ attr->vlan = tcf_vlan_push_vid(a);
++ } else { /* action is TCA_VLAN_ACT_MODIFY */
++ return -EOPNOTSUPP;
+ }
+ continue;
+ }
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Wed, 15 Mar 2017 18:10:47 +0200
+Subject: net/openvswitch: Set the ipv6 source tunnel key address attribute correctly
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 3d20f1f7bd575d147ffa75621fa560eea0aec690 ]
+
+When dealing with ipv6 source tunnel key address attribute
+(OVS_TUNNEL_KEY_ATTR_IPV6_SRC) we are wrongly setting the tunnel
+dst ip, fix that.
+
+Fixes: 6b26ba3a7d95 ('openvswitch: netlink attributes for IPv6 tunneling')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Paul Blakey <paulb@mellanox.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Acked-by: Joe Stringer <joe@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -588,7 +588,7 @@ static int ip_tun_from_nlattr(const stru
+ ipv4 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+- SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
++ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
+ break;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 15 Mar 2017 13:21:28 -0700
+Subject: net: properly release sk_frag.page
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 ]
+
+I mistakenly added the code to release sk->sk_frag in
+sk_common_release() instead of sk_destruct()
+
+TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call
+sk_common_release() at close time, thus leaking one (order-3) page.
+
+iSCSI is using such sockets.
+
+Fixes: 5640f7685831 ("net: use a per task frag allocator")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1444,6 +1444,11 @@ static void __sk_destruct(struct rcu_hea
+ pr_debug("%s: optmem leakage (%d bytes) detected\n",
+ __func__, atomic_read(&sk->sk_omem_alloc));
+
++ if (sk->sk_frag.page) {
++ put_page(sk->sk_frag.page);
++ sk->sk_frag.page = NULL;
++ }
++
+ if (sk->sk_peer_cred)
+ put_cred(sk->sk_peer_cred);
+ put_pid(sk->sk_peer_pid);
+@@ -2774,11 +2779,6 @@ void sk_common_release(struct sock *sk)
+
+ sk_refcnt_debug_release(sk);
+
+- if (sk->sk_frag.page) {
+- put_page(sk->sk_frag.page);
+- sk->sk_frag.page = NULL;
+- }
+-
+ sock_put(sk);
+ }
+ EXPORT_SYMBOL(sk_common_release);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 16 Mar 2017 19:02:33 -0700
+Subject: net: solve a NAPI race
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+commit 39e6c8208d7b6fb9d2047850fb3327db567b564b upstream.
+
+While playing with mlx4 hardware timestamping of RX packets, I found
+that some packets were received by TCP stack with a ~200 ms delay...
+
+Since the timestamp was provided by the NIC, and my probe was added
+in tcp_v4_rcv() while in BH handler, I was confident it was not
+a sender issue, or a drop in the network.
+
+This would happen with a very low probability, but hurting RPC
+workloads.
+
+A NAPI driver normally arms the IRQ after the napi_complete_done(),
+after NAPI_STATE_SCHED is cleared, so that the hard irq handler can grab
+it.
+
+Problem is that if another point in the stack grabs NAPI_STATE_SCHED bit
+while IRQ are not disabled, we might have later an IRQ firing and
+finding this bit set, right before napi_complete_done() clears it.
+
+This can happen with busy polling users, or if gro_flush_timeout is
+used. But some other uses of napi_schedule() in drivers can cause this
+as well.
+
+thread 1 thread 2 (could be on same cpu, or not)
+
+// busy polling or napi_watchdog()
+napi_schedule();
+...
+napi->poll()
+
+device polling:
+read 2 packets from ring buffer
+ Additional 3rd packet is
+available.
+ device hard irq
+
+ // does nothing because
+NAPI_STATE_SCHED bit is owned by thread 1
+ napi_schedule();
+
+napi_complete_done(napi, 2);
+rearm_irq();
+
+Note that rearm_irq() will not force the device to send an additional
+IRQ for the packet it already signaled (3rd packet in my example)
+
+This patch adds a new NAPI_STATE_MISSED bit, that napi_schedule_prep()
+can set if it could not grab NAPI_STATE_SCHED
+
+Then napi_complete_done() properly reschedules the napi to make sure
+we do not miss something.
+
+Since we manipulate multiple bits at once, use cmpxchg() like in
+sk_busy_loop() to provide proper transactions.
+
+In v2, I changed napi_watchdog() to use a relaxed variant of
+napi_schedule_prep() : No need to set NAPI_STATE_MISSED from this point.
+
+In v3, I added more details in the changelog and clears
+NAPI_STATE_MISSED in busy_poll_stop()
+
+In v4, I added the ideas given by Alexander Duyck in v3 review
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h | 29 +++++-----------
+ net/core/dev.c | 81 ++++++++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 83 insertions(+), 27 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -330,6 +330,7 @@ struct napi_struct {
+
+ enum {
+ NAPI_STATE_SCHED, /* Poll is scheduled */
++ NAPI_STATE_MISSED, /* reschedule a napi */
+ NAPI_STATE_DISABLE, /* Disable pending */
+ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
+ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
+@@ -338,12 +339,13 @@ enum {
+ };
+
+ enum {
+- NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED),
+- NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE),
+- NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC),
+- NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED),
+- NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
+- NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
++ NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED),
++ NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
++ NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
++ NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
++ NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
++ NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
++ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ };
+
+ enum gro_result {
+@@ -413,20 +415,7 @@ static inline bool napi_disable_pending(
+ return test_bit(NAPI_STATE_DISABLE, &n->state);
+ }
+
+-/**
+- * napi_schedule_prep - check if NAPI can be scheduled
+- * @n: NAPI context
+- *
+- * Test if NAPI routine is already running, and if not mark
+- * it as running. This is used as a condition variable to
+- * insure only one NAPI poll instance runs. We also make
+- * sure there is no pending NAPI disable.
+- */
+-static inline bool napi_schedule_prep(struct napi_struct *n)
+-{
+- return !napi_disable_pending(n) &&
+- !test_and_set_bit(NAPI_STATE_SCHED, &n->state);
+-}
++bool napi_schedule_prep(struct napi_struct *n);
+
+ /**
+ * napi_schedule - schedule NAPI poll
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4913,6 +4913,39 @@ void __napi_schedule(struct napi_struct
+ EXPORT_SYMBOL(__napi_schedule);
+
+ /**
++ * napi_schedule_prep - check if napi can be scheduled
++ * @n: napi context
++ *
++ * Test if NAPI routine is already running, and if not mark
++ * it as running. This is used as a condition variable
++ * insure only one NAPI poll instance runs. We also make
++ * sure there is no pending NAPI disable.
++ */
++bool napi_schedule_prep(struct napi_struct *n)
++{
++ unsigned long val, new;
++
++ do {
++ val = READ_ONCE(n->state);
++ if (unlikely(val & NAPIF_STATE_DISABLE))
++ return false;
++ new = val | NAPIF_STATE_SCHED;
++
++ /* Sets STATE_MISSED bit if STATE_SCHED was already set
++ * This was suggested by Alexander Duyck, as compiler
++ * emits better code than :
++ * if (val & NAPIF_STATE_SCHED)
++ * new |= NAPIF_STATE_MISSED;
++ */
++ new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
++ NAPIF_STATE_MISSED;
++ } while (cmpxchg(&n->state, val, new) != val);
++
++ return !(val & NAPIF_STATE_SCHED);
++}
++EXPORT_SYMBOL(napi_schedule_prep);
++
++/**
+ * __napi_schedule_irqoff - schedule for receive
+ * @n: entry to schedule
+ *
+@@ -4943,7 +4976,7 @@ EXPORT_SYMBOL(__napi_complete);
+
+ bool napi_complete_done(struct napi_struct *n, int work_done)
+ {
+- unsigned long flags;
++ unsigned long flags, val, new;
+
+ /*
+ * 1) Don't let napi dequeue from the cpu poll list
+@@ -4967,14 +5000,33 @@ bool napi_complete_done(struct napi_stru
+ else
+ napi_gro_flush(n, false);
+ }
+- if (likely(list_empty(&n->poll_list))) {
+- WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+- } else {
++ if (unlikely(!list_empty(&n->poll_list))) {
+ /* If n->poll_list is not empty, we need to mask irqs */
+ local_irq_save(flags);
+- __napi_complete(n);
++ list_del_init(&n->poll_list);
+ local_irq_restore(flags);
+ }
++
++ do {
++ val = READ_ONCE(n->state);
++
++ WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
++
++ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
++
++ /* If STATE_MISSED was set, leave STATE_SCHED set,
++ * because we will call napi->poll() one more time.
++ * This C code was suggested by Alexander Duyck to help gcc.
++ */
++ new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
++ NAPIF_STATE_SCHED;
++ } while (cmpxchg(&n->state, val, new) != val);
++
++ if (unlikely(val & NAPIF_STATE_MISSED)) {
++ __napi_schedule(n);
++ return false;
++ }
++
+ return true;
+ }
+ EXPORT_SYMBOL(napi_complete_done);
+@@ -5000,6 +5052,16 @@ static void busy_poll_stop(struct napi_s
+ {
+ int rc;
+
++ /* Busy polling means there is a high chance device driver hard irq
++ * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
++ * set in napi_schedule_prep().
++ * Since we are about to call napi->poll() once more, we can safely
++ * clear NAPI_STATE_MISSED.
++ *
++ * Note: x86 could use a single "lock and ..." instruction
++ * to perform these two clear_bit()
++ */
++ clear_bit(NAPI_STATE_MISSED, &napi->state);
+ clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
+
+ local_bh_disable();
+@@ -5146,8 +5208,13 @@ static enum hrtimer_restart napi_watchdo
+ struct napi_struct *napi;
+
+ napi = container_of(timer, struct napi_struct, timer);
+- if (napi->gro_list)
+- napi_schedule(napi);
++
++ /* Note : we use a relaxed variant of napi_schedule_prep() not setting
++ * NAPI_STATE_MISSED, since we do not react to a device IRQ.
++ */
++ if (napi->gro_list && !napi_disable_pending(napi) &&
++ !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
++ __napi_schedule_irqoff(napi);
+
+ return HRTIMER_NORESTART;
+ }
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Andrey Ulanov <andreyu@google.com>
+Date: Tue, 14 Mar 2017 20:16:42 -0700
+Subject: net: unix: properly re-increment inflight counter of GC discarded candidates
+
+From: Andrey Ulanov <andreyu@google.com>
+
+
+[ Upstream commit 7df9c24625b9981779afb8fcdbe2bb4765e61147 ]
+
+Dmitry has reported that a BUG_ON() condition in unix_notinflight()
+may be triggered by a simple code that forwards unix socket in an
+SCM_RIGHTS message.
+That is caused by incorrect unix socket GC implementation in unix_gc().
+
+The GC first collects list of candidates, then (a) decrements their
+"children's" inflight counter, (b) checks which inflight counters are
+now 0, and then (c) increments all inflight counters back.
+(a) and (c) are done by calling scan_children() with inc_inflight or
+dec_inflight as the second argument.
+
+Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage
+collector") changed scan_children() such that it no longer considers
+sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block
+of code that that unsets this flag _before_ invoking
+scan_children(, dec_iflight, ). This may lead to incorrect inflight
+counters for some sockets.
+
+This change fixes this bug by changing order of operations:
+UNIX_GC_CANDIDATE is now unset only after all inflight counters are
+restored to the original state.
+
+ kernel BUG at net/unix/garbage.c:149!
+ RIP: 0010:[<ffffffff8717ebf4>] [<ffffffff8717ebf4>]
+ unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149
+ Call Trace:
+ [<ffffffff8716cfbf>] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487
+ [<ffffffff8716f6a9>] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496
+ [<ffffffff86a90a01>] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655
+ [<ffffffff86a9808a>] skb_release_all+0x1a/0x60 net/core/skbuff.c:668
+ [<ffffffff86a980ea>] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684
+ [<ffffffff86a98284>] kfree_skb+0x184/0x570 net/core/skbuff.c:705
+ [<ffffffff871789d5>] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559
+ [<ffffffff87179039>] unix_release+0x49/0x90 net/unix/af_unix.c:836
+ [<ffffffff86a694b2>] sock_release+0x92/0x1f0 net/socket.c:570
+ [<ffffffff86a6962b>] sock_close+0x1b/0x20 net/socket.c:1017
+ [<ffffffff81a76b8e>] __fput+0x34e/0x910 fs/file_table.c:208
+ [<ffffffff81a771da>] ____fput+0x1a/0x20 fs/file_table.c:244
+ [<ffffffff81483ab0>] task_work_run+0x1a0/0x280 kernel/task_work.c:116
+ [< inline >] exit_task_work include/linux/task_work.h:21
+ [<ffffffff8141287a>] do_exit+0x183a/0x2640 kernel/exit.c:828
+ [<ffffffff8141383e>] do_group_exit+0x14e/0x420 kernel/exit.c:931
+ [<ffffffff814429d3>] get_signal+0x663/0x1880 kernel/signal.c:2307
+ [<ffffffff81239b45>] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807
+ [<ffffffff8100666a>] exit_to_usermode_loop+0x1ea/0x2d0
+ arch/x86/entry/common.c:156
+ [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190
+ [<ffffffff81009693>] syscall_return_slowpath+0x4d3/0x570
+ arch/x86/entry/common.c:259
+ [<ffffffff881478e6>] entry_SYSCALL_64_fastpath+0xc4/0xc6
+
+Link: https://lkml.org/lkml/2017/3/6/252
+Signed-off-by: Andrey Ulanov <andreyu@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/garbage.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
++ BUG_ON(!atomic_long_read(&u->inflight));
+ BUG_ON(list_empty(&u->link));
+
+ if (atomic_long_dec_and_test(&u->inflight))
+@@ -341,6 +342,14 @@ void unix_gc(void)
+ }
+ list_del(&cursor);
+
++ /* Now gc_candidates contains only garbage. Restore original
++ * inflight counters for these as well, and remove the skbuffs
++ * which are creating the cycle(s).
++ */
++ skb_queue_head_init(&hitlist);
++ list_for_each_entry(u, &gc_candidates, link)
++ scan_children(&u->sk, inc_inflight, &hitlist);
++
+ /* not_cycle_list contains those sockets which do not make up a
+ * cycle. Restore these to the inflight list.
+ */
+@@ -350,14 +359,6 @@ void unix_gc(void)
+ list_move_tail(&u->link, &gc_inflight_list);
+ }
+
+- /* Now gc_candidates contains only garbage. Restore original
+- * inflight counters for these as well, and remove the skbuffs
+- * which are creating the cycle(s).
+- */
+- skb_queue_head_init(&hitlist);
+- list_for_each_entry(u, &gc_candidates, link)
+- scan_children(&u->sk, inc_inflight, &hitlist);
+-
+ spin_unlock(&unix_gc_lock);
+
+ /* Here we are. Hitlist is filled. Die. */
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Fri, 17 Mar 2017 16:07:11 -0700
+Subject: net: vrf: Reset rt6i_idev in local dst after put
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 3dc857f0e8fc22610a59cbb346ba62c6e921863f ]
+
+The VRF driver takes a reference to the inet6_dev on the VRF device for
+its rt6_local dst when handling local traffic through the VRF device as
+a loopback. When the device is deleted the driver does a put on the idev
+but does not reset rt6i_idev in the rt6_info struct. When the dst is
+destroyed, dst_destroy calls ip6_dst_destroy which does a second put for
+what is essentially the same reference causing it to be prematurely freed.
+Reset rt6i_idev after the put in the vrf driver.
+
+Fixes: b4869aa2f881e ("net: vrf: ipv6 support for local traffic to
+ local addresses")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -462,8 +462,10 @@ static void vrf_rt6_release(struct net_d
+ }
+
+ if (rt6_local) {
+- if (rt6_local->rt6i_idev)
++ if (rt6_local->rt6i_idev) {
+ in6_dev_put(rt6_local->rt6i_idev);
++ rt6_local->rt6i_idev = NULL;
++ }
+
+ dst = &rt6_local->dst;
+ dev_put(dst->dev);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Kris Murphy <kriskend@linux.vnet.ibm.com>
+Date: Thu, 16 Mar 2017 10:51:28 -0500
+Subject: openvswitch: Add missing case OVS_TUNNEL_KEY_ATTR_PAD
+
+From: Kris Murphy <kriskend@linux.vnet.ibm.com>
+
+
+[ Upstream commit 8f3dbfd79ed9ef9770305a7cc4e13dfd31ad2cd0 ]
+
+Added a case for OVS_TUNNEL_KEY_ATTR_PAD to the switch statement
+in ip_tun_from_nlattr in order to prevent the default case
+returning an error.
+
+Fixes: b46f6ded906e ("libnl: nla_put_be64(): align on a 64-bit area")
+Signed-off-by: Kris Murphy <kriskend@linux.vnet.ibm.com>
+Acked-by: Joe Stringer <joe@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -649,6 +649,8 @@ static int ip_tun_from_nlattr(const stru
+ tun_flags |= TUNNEL_VXLAN_OPT;
+ opts_type = type;
+ break;
++ case OVS_TUNNEL_KEY_ATTR_PAD:
++ break;
+ default:
+ OVS_NLERR(log, "Unknown IP tunnel attribute %d",
+ type);
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Bjørn Mork <bjorn@mork.no>
+Date: Fri, 17 Mar 2017 17:20:48 +0100
+Subject: qmi_wwan: add Dell DW5811e
+
+From: Bjørn Mork <bjorn@mork.no>
+
+
+[ Upstream commit 6bd845d1cf98b45c634baacb8381436dad3c2dd0 ]
+
+This is a Dell branded Sierra Wireless EM7455. It is operating in
+MBIM mode by default, but can be configured to provide two QMI/RMNET
+functions.
+
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -924,6 +924,8 @@ static const struct usb_device_id produc
+ {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
+ {QMI_FIXED_INTF(0x413c, 0x81b1, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+ {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
++ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */
++ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */
+ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
+ {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */
--- /dev/null
+net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
+net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
+net-properly-release-sk_frag.page.patch
+amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
+openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch
+net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
+qmi_wwan-add-dell-dw5811e.patch
+net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch
+net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch
+net-mlx5e-change-the-tc-offload-rule-add-del-code-path-to-be-per-nic-or-e-switch.patch
+net-mlx5-e-switch-don-t-allow-changing-inline-mode-when-flows-are-configured.patch
+net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch
+net-mlx5e-avoid-supporting-udp-tunnel-port-ndo-for-vf-reps.patch
+net-mlx5-increase-number-of-max-qps-in-default-profile.patch
+net-mlx5e-count-gso-packets-correctly.patch
+net-mlx5e-count-lro-packets-correctly.patch
+ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch
+net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
+ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
+socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
+genetlink-fix-counting-regression-on-ctrl_dumpfamily.patch
+tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch
+amd-xgbe-fix-the-ecc-related-bit-position-definitions.patch
+net-solve-a-napi-race.patch
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 22 Mar 2017 13:08:08 +0100
+Subject: socket, bpf: fix sk_filter use after free in sk_clone_lock
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit a97e50cc4cb67e1e7bff56f6b41cda62ca832336 ]
+
+In sk_clone_lock(), we create a new socket and inherit most of the
+parent's members via sock_copy() which memcpy()'s various sections.
+Now, in case the parent socket had a BPF socket filter attached,
+then newsk->sk_filter points to the same instance as the original
+sk->sk_filter.
+
+sk_filter_charge() is then called on the newsk->sk_filter to take a
+reference and should that fail due to hitting max optmem, we bail
+out and release the newsk instance.
+
+The issue is that commit 278571baca2a ("net: filter: simplify socket
+charging") wrongly combined the dismantle path with the failure path
+of xfrm_sk_clone_policy(). This means, even when charging failed, we
+call sk_free_unlock_clone() on the newsk, which then still points to
+the same sk_filter as the original sk.
+
+Thus, sk_free_unlock_clone() calls into __sk_destruct() eventually
+where it tests for present sk_filter and calls sk_filter_uncharge()
+on it, which potentially lets sk_omem_alloc wrap around and releases
+the eBPF prog and sk_filter structure from the (still intact) parent.
+
+Fix it by making sure that when sk_filter_charge() failed, we reset
+newsk->sk_filter back to NULL before passing to sk_free_unlock_clone(),
+so that we don't mess with the parents sk_filter.
+
+Only if xfrm_sk_clone_policy() fails, we did reach the point where
+either the parent's filter was NULL and as a result newsk's as well
+or where we previously had a successful sk_filter_charge(), thus for
+that case, we do need sk_filter_uncharge() to release the prior taken
+reference on sk_filter.
+
+Fixes: 278571baca2a ("net: filter: simplify socket charging")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1545,6 +1545,12 @@ struct sock *sk_clone_lock(const struct
+ is_charged = sk_filter_charge(newsk, filter);
+
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
++ /* We need to make sure that we don't uncharge the new
++ * socket if we couldn't charge it in the first place
++ * as otherwise we uncharge the parent's filter.
++ */
++ if (!is_charged)
++ RCU_INIT_POINTER(newsk->sk_filter, NULL);
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
--- /dev/null
+From foo@baz Mon Mar 27 18:18:08 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 22 Mar 2017 08:10:21 -0700
+Subject: tcp: initialize icsk_ack.lrcvtime at session start time
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 15bb7745e94a665caf42bfaabf0ce062845b533b ]
+
+icsk_ack.lrcvtime has a 0 value at socket creation time.
+
+tcpi_last_data_recv can have bogus value if no payload is ever received.
+
+This patch initializes icsk_ack.lrcvtime for active sessions
+in tcp_finish_connect(), and for passive sessions in
+tcp_create_openreq_child()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ net/ipv4/tcp_minisocks.c | 1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5571,6 +5571,7 @@ void tcp_finish_connect(struct sock *sk,
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_set_state(sk, TCP_ESTABLISHED);
++ icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+
+ if (skb) {
+ icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
+@@ -5789,7 +5790,6 @@ static int tcp_rcv_synsent_state_process
+ * to stand against the temptation 8) --ANK
+ */
+ inet_csk_schedule_ack(sk);
+- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ tcp_enter_quickack_mode(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MAX, TCP_RTO_MAX);
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -466,6 +466,7 @@ struct sock *tcp_create_openreq_child(co
+ newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+ minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
+ newicsk->icsk_rto = TCP_TIMEOUT_INIT;
++ newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
+
+ newtp->packets_out = 0;
+ newtp->retrans_out = 0;
--- /dev/null
+net-openvswitch-set-the-ipv6-source-tunnel-key-address-attribute-correctly.patch
+net-bcmgenet-do-not-suspend-phy-if-wake-on-lan-is-enabled.patch
+net-properly-release-sk_frag.page.patch
+amd-xgbe-fix-jumbo-mtu-processing-on-newer-hardware.patch
+openvswitch-add-missing-case-ovs_tunnel_key_attr_pad.patch
+net-unix-properly-re-increment-inflight-counter-of-gc-discarded-candidates.patch
+qmi_wwan-add-dell-dw5811e.patch
+net-vrf-reset-rt6i_idev-in-local-dst-after-put.patch
+net-mlx5-add-missing-entries-for-set-query-rate-limit-commands.patch
+net-mlx5e-use-the-proper-uapi-values-when-offloading-tc-vlan-actions.patch
+net-mlx5-increase-number-of-max-qps-in-default-profile.patch
+net-mlx5e-count-gso-packets-correctly.patch
+net-mlx5e-count-lro-packets-correctly.patch
+ipv6-make-sure-to-initialize-sockc.tsflags-before-first-use.patch
+net-bcmgenet-remove-bcmgenet_internal_phy_setup.patch
+ipv4-provide-stronger-user-input-validation-in-nl_fib_input.patch
+socket-bpf-fix-sk_filter-use-after-free-in-sk_clone_lock.patch
+tcp-initialize-icsk_ack.lrcvtime-at-session-start-time.patch