--- /dev/null
+From 40ba35e74fa56866918d2f3bc0528b5b92725d5e Mon Sep 17 00:00:00 2001
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:10 +0100
+Subject: net: mvneta: add missing bit descriptions for interrupt masks and causes
+
+From: willy tarreau <w@1wt.eu>
+
+commit 40ba35e74fa56866918d2f3bc0528b5b92725d5e upstream.
+
+Marvell has not published the chip's datasheet yet, so it's very hard
+to find the relevant bits to manipulate to change the IRQ behaviour.
+Fortunately, these bits are described in the proprietary LSP patch set
+which is publicly available here :
+
+ http://www.plugcomputer.org/downloads/mirabox/
+
+So let's put them back in the driver in order to reduce the burden of
+current and future maintenance.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c | 44 ++++++++++++++++++++++++++++++++--
+ 1 file changed, 42 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -99,16 +99,56 @@
+ #define MVNETA_CPU_RXQ_ACCESS_ALL_MASK 0x000000ff
+ #define MVNETA_CPU_TXQ_ACCESS_ALL_MASK 0x0000ff00
+ #define MVNETA_RXQ_TIME_COAL_REG(q) (0x2580 + ((q) << 2))
++
++/* Exception Interrupt Port/Queue Cause register */
++
+ #define MVNETA_INTR_NEW_CAUSE 0x25a0
+-#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8)
+ #define MVNETA_INTR_NEW_MASK 0x25a4
++
++/* bits 0..7 = TXQ SENT, one bit per queue.
++ * bits 8..15 = RXQ OCCUP, one bit per queue.
++ * bits 16..23 = RXQ FREE, one bit per queue.
++ * bit 29 = OLD_REG_SUM, see old reg ?
++ * bit 30 = TX_ERR_SUM, one bit for 4 ports
++ * bit 31 = MISC_SUM, one bit for 4 ports
++ */
++#define MVNETA_TX_INTR_MASK(nr_txqs) (((1 << nr_txqs) - 1) << 0)
++#define MVNETA_TX_INTR_MASK_ALL (0xff << 0)
++#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8)
++#define MVNETA_RX_INTR_MASK_ALL (0xff << 8)
++
+ #define MVNETA_INTR_OLD_CAUSE 0x25a8
+ #define MVNETA_INTR_OLD_MASK 0x25ac
++
++/* Data Path Port/Queue Cause Register */
+ #define MVNETA_INTR_MISC_CAUSE 0x25b0
+ #define MVNETA_INTR_MISC_MASK 0x25b4
++
++#define MVNETA_CAUSE_PHY_STATUS_CHANGE BIT(0)
++#define MVNETA_CAUSE_LINK_CHANGE BIT(1)
++#define MVNETA_CAUSE_PTP BIT(4)
++
++#define MVNETA_CAUSE_INTERNAL_ADDR_ERR BIT(7)
++#define MVNETA_CAUSE_RX_OVERRUN BIT(8)
++#define MVNETA_CAUSE_RX_CRC_ERROR BIT(9)
++#define MVNETA_CAUSE_RX_LARGE_PKT BIT(10)
++#define MVNETA_CAUSE_TX_UNDERUN BIT(11)
++#define MVNETA_CAUSE_PRBS_ERR BIT(12)
++#define MVNETA_CAUSE_PSC_SYNC_CHANGE BIT(13)
++#define MVNETA_CAUSE_SERDES_SYNC_ERR BIT(14)
++
++#define MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT 16
++#define MVNETA_CAUSE_BMU_ALLOC_ERR_ALL_MASK (0xF << MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT)
++#define MVNETA_CAUSE_BMU_ALLOC_ERR_MASK(pool) (1 << (MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT + (pool)))
++
++#define MVNETA_CAUSE_TXQ_ERROR_SHIFT 24
++#define MVNETA_CAUSE_TXQ_ERROR_ALL_MASK (0xFF << MVNETA_CAUSE_TXQ_ERROR_SHIFT)
++#define MVNETA_CAUSE_TXQ_ERROR_MASK(q) (1 << (MVNETA_CAUSE_TXQ_ERROR_SHIFT + (q)))
++
+ #define MVNETA_INTR_ENABLE 0x25b8
+ #define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00
+-#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000
++#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 // note: neta says it's 0x000000FF
++
+ #define MVNETA_RXQ_CMD 0x2680
+ #define MVNETA_RXQ_DISABLE_SHIFT 8
+ #define MVNETA_RXQ_ENABLE_MASK 0x000000ff
--- /dev/null
+From 290213667ab53a95456397763205e4b1e30f46b5 Mon Sep 17 00:00:00 2001
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:09 +0100
+Subject: net: mvneta: do not schedule in mvneta_tx_timeout
+
+From: willy tarreau <w@1wt.eu>
+
+commit 290213667ab53a95456397763205e4b1e30f46b5 upstream.
+
+If a queue timeout is reported, we can oops because of some
+schedules while the caller is atomic, as shown below :
+
+ mvneta d0070000.ethernet eth0: tx timeout
+ BUG: scheduling while atomic: bash/1528/0x00000100
+ Modules linked in: slhttp_ethdiv(C) [last unloaded: slhttp_ethdiv]
+ CPU: 2 PID: 1528 Comm: bash Tainted: G WC 3.13.0-rc4-mvebu-nf #180
+ [<c0011bd9>] (unwind_backtrace+0x1/0x98) from [<c000f1ab>] (show_stack+0xb/0xc)
+ [<c000f1ab>] (show_stack+0xb/0xc) from [<c02ad323>] (dump_stack+0x4f/0x64)
+ [<c02ad323>] (dump_stack+0x4f/0x64) from [<c02abe67>] (__schedule_bug+0x37/0x4c)
+ [<c02abe67>] (__schedule_bug+0x37/0x4c) from [<c02ae261>] (__schedule+0x325/0x3ec)
+ [<c02ae261>] (__schedule+0x325/0x3ec) from [<c02adb97>] (schedule_timeout+0xb7/0x118)
+ [<c02adb97>] (schedule_timeout+0xb7/0x118) from [<c0020a67>] (msleep+0xf/0x14)
+ [<c0020a67>] (msleep+0xf/0x14) from [<c01dcbe5>] (mvneta_stop_dev+0x21/0x194)
+ [<c01dcbe5>] (mvneta_stop_dev+0x21/0x194) from [<c01dcfe9>] (mvneta_tx_timeout+0x19/0x24)
+ [<c01dcfe9>] (mvneta_tx_timeout+0x19/0x24) from [<c024afc7>] (dev_watchdog+0x18b/0x1c4)
+ [<c024afc7>] (dev_watchdog+0x18b/0x1c4) from [<c0020b53>] (call_timer_fn.isra.27+0x17/0x5c)
+ [<c0020b53>] (call_timer_fn.isra.27+0x17/0x5c) from [<c0020cad>] (run_timer_softirq+0x115/0x170)
+ [<c0020cad>] (run_timer_softirq+0x115/0x170) from [<c001ccb9>] (__do_softirq+0xbd/0x1a8)
+ [<c001ccb9>] (__do_softirq+0xbd/0x1a8) from [<c001cfad>] (irq_exit+0x61/0x98)
+ [<c001cfad>] (irq_exit+0x61/0x98) from [<c000d4bf>] (handle_IRQ+0x27/0x60)
+ [<c000d4bf>] (handle_IRQ+0x27/0x60) from [<c000843b>] (armada_370_xp_handle_irq+0x33/0xc8)
+ [<c000843b>] (armada_370_xp_handle_irq+0x33/0xc8) from [<c000fba9>] (__irq_usr+0x49/0x60)
+
+Ben Hutchings attempted to propose a better fix consisting in using a
+scheduled work for this, but while it fixed this panic, it caused other
+random freezes and panics proving that the reset sequence in the driver
+is unreliable and that additional fixes should be investigated.
+
+When sending multiple streams over a link limited to 100 Mbps, Tx timeouts
+happen from time to time, and the driver correctly recovers only when the
+function is disabled.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c | 11 -----------
+ 1 file changed, 11 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -2207,16 +2207,6 @@ static void mvneta_stop_dev(struct mvnet
+ mvneta_rx_reset(pp);
+ }
+
+-/* tx timeout callback - display a message and stop/start the network device */
+-static void mvneta_tx_timeout(struct net_device *dev)
+-{
+- struct mvneta_port *pp = netdev_priv(dev);
+-
+- netdev_info(dev, "tx timeout\n");
+- mvneta_stop_dev(pp);
+- mvneta_start_dev(pp);
+-}
+-
+ /* Return positive if MTU is valid */
+ static int mvneta_check_mtu_valid(struct net_device *dev, int mtu)
+ {
+@@ -2567,7 +2557,6 @@ static const struct net_device_ops mvnet
+ .ndo_set_rx_mode = mvneta_set_rx_mode,
+ .ndo_set_mac_address = mvneta_set_mac_addr,
+ .ndo_change_mtu = mvneta_change_mtu,
+- .ndo_tx_timeout = mvneta_tx_timeout,
+ .ndo_get_stats64 = mvneta_get_stats64,
+ };
+
--- /dev/null
+From 71f6d1b31fb1f278a345a30a2180515adc7d80ae Mon Sep 17 00:00:00 2001
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:11 +0100
+Subject: net: mvneta: replace Tx timer with a real interrupt
+
+From: willy tarreau <w@1wt.eu>
+
+commit 71f6d1b31fb1f278a345a30a2180515adc7d80ae upstream.
+
+Right now the mvneta driver doesn't handle Tx IRQ, and relies on two
+mechanisms to flush Tx descriptors : a flush at the end of mvneta_tx()
+and a timer. If a burst of packets is emitted faster than the device
+can send them, then the queue is stopped until next wake-up of the
+timer 10ms later. This causes jerky output traffic with bursts and
+pauses, making it difficult to reach line rate with very few streams.
+
+A test on UDP traffic shows that it's not possible to go beyond 134
+Mbps / 12 kpps of outgoing traffic with 1500-bytes IP packets. Routed
+traffic tends to observe pauses as well if the traffic is bursty,
+making it even burstier after the wake-up.
+
+It seems that this feature was inherited from the original driver but
+nothing there mentions any reason for not using the interrupt instead,
+which the chip supports.
+
+Thus, this patch enables Tx interrupts and removes the timer. It does
+the two at once because it's not really possible to make the two
+mechanisms coexist, so a split patch doesn't make sense.
+
+First tests performed on a Mirabox (Armada 370) show that less CPU
+seems to be used when sending traffic. One reason might be that we now
+call the mvneta_tx_done_gbe() with a mask indicating which queues have
+been done instead of looping over all of them.
+
+The same UDP test above now happily reaches 987 Mbps / 87.7 kpps.
+Single-stream TCP traffic can now more easily reach line rate. HTTP
+transfers of 1 MB objects over a single connection went from 730 to
+840 Mbps. It is even possible to go significantly higher (>900 Mbps)
+by tweaking tcp_tso_win_divisor.
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Cc: Arnaud Ebalard <arno@natisbad.org>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c | 72 +++++-----------------------------
+ 1 file changed, 12 insertions(+), 60 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -214,9 +214,6 @@
+ #define MVNETA_RX_COAL_PKTS 32
+ #define MVNETA_RX_COAL_USEC 100
+
+-/* Timer */
+-#define MVNETA_TX_DONE_TIMER_PERIOD 10
+-
+ /* Napi polling weight */
+ #define MVNETA_RX_POLL_WEIGHT 64
+
+@@ -272,16 +269,11 @@ struct mvneta_port {
+ void __iomem *base;
+ struct mvneta_rx_queue *rxqs;
+ struct mvneta_tx_queue *txqs;
+- struct timer_list tx_done_timer;
+ struct net_device *dev;
+
+ u32 cause_rx_tx;
+ struct napi_struct napi;
+
+- /* Flags */
+- unsigned long flags;
+-#define MVNETA_F_TX_DONE_TIMER_BIT 0
+-
+ /* Napi weight */
+ int weight;
+
+@@ -1112,17 +1104,6 @@ static void mvneta_tx_done_pkts_coal_set
+ txq->done_pkts_coal = value;
+ }
+
+-/* Trigger tx done timer in MVNETA_TX_DONE_TIMER_PERIOD msecs */
+-static void mvneta_add_tx_done_timer(struct mvneta_port *pp)
+-{
+- if (test_and_set_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags) == 0) {
+- pp->tx_done_timer.expires = jiffies +
+- msecs_to_jiffies(MVNETA_TX_DONE_TIMER_PERIOD);
+- add_timer(&pp->tx_done_timer);
+- }
+-}
+-
+-
+ /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
+ static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
+ u32 phys_addr, u32 cookie)
+@@ -1614,15 +1595,6 @@ out:
+ dev_kfree_skb_any(skb);
+ }
+
+- if (txq->count >= MVNETA_TXDONE_COAL_PKTS)
+- mvneta_txq_done(pp, txq);
+-
+- /* If after calling mvneta_txq_done, count equals
+- * frags, we need to set the timer
+- */
+- if (txq->count == frags && frags > 0)
+- mvneta_add_tx_done_timer(pp);
+-
+ return NETDEV_TX_OK;
+ }
+
+@@ -1898,14 +1870,22 @@ static int mvneta_poll(struct napi_struc
+
+ /* Read cause register */
+ cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE) &
+- MVNETA_RX_INTR_MASK(rxq_number);
++ (MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
++
++ /* Release Tx descriptors */
++ if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) {
++ int tx_todo = 0;
++
++ mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL), &tx_todo);
++ cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL;
++ }
+
+ /* For the case where the last mvneta_poll did not process all
+ * RX packets
+ */
+ cause_rx_tx |= pp->cause_rx_tx;
+ if (rxq_number > 1) {
+- while ((cause_rx_tx != 0) && (budget > 0)) {
++ while ((cause_rx_tx & MVNETA_RX_INTR_MASK_ALL) && (budget > 0)) {
+ int count;
+ struct mvneta_rx_queue *rxq;
+ /* get rx queue number from cause_rx_tx */
+@@ -1937,7 +1917,7 @@ static int mvneta_poll(struct napi_struc
+ napi_complete(napi);
+ local_irq_save(flags);
+ mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+- MVNETA_RX_INTR_MASK(rxq_number));
++ MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
+ local_irq_restore(flags);
+ }
+
+@@ -1945,26 +1925,6 @@ static int mvneta_poll(struct napi_struc
+ return rx_done;
+ }
+
+-/* tx done timer callback */
+-static void mvneta_tx_done_timer_callback(unsigned long data)
+-{
+- struct net_device *dev = (struct net_device *)data;
+- struct mvneta_port *pp = netdev_priv(dev);
+- int tx_done = 0, tx_todo = 0;
+-
+- if (!netif_running(dev))
+- return ;
+-
+- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+-
+- tx_done = mvneta_tx_done_gbe(pp,
+- (((1 << txq_number) - 1) &
+- MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK),
+- &tx_todo);
+- if (tx_todo > 0)
+- mvneta_add_tx_done_timer(pp);
+-}
+-
+ /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
+ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+ int num)
+@@ -2214,7 +2174,7 @@ static void mvneta_start_dev(struct mvne
+
+ /* Unmask interrupts */
+ mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+- MVNETA_RX_INTR_MASK(rxq_number));
++ MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
+
+ phy_start(pp->phy_dev);
+ netif_tx_start_all_queues(pp->dev);
+@@ -2475,8 +2435,6 @@ static int mvneta_stop(struct net_device
+ free_irq(dev->irq, pp);
+ mvneta_cleanup_rxqs(pp);
+ mvneta_cleanup_txqs(pp);
+- del_timer(&pp->tx_done_timer);
+- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+
+ return 0;
+ }
+@@ -2777,10 +2735,6 @@ static int mvneta_probe(struct platform_
+
+ pp = netdev_priv(dev);
+
+- pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
+- init_timer(&pp->tx_done_timer);
+- clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+-
+ pp->weight = MVNETA_RX_POLL_WEIGHT;
+ pp->phy_node = phy_node;
+ pp->phy_interface = phy_mode;
+@@ -2806,8 +2760,6 @@ static int mvneta_probe(struct platform_
+ goto err_clk;
+ }
+
+- pp->tx_done_timer.data = (unsigned long)dev;
+-
+ pp->tx_ring_size = MVNETA_MAX_TXD;
+ pp->rx_ring_size = MVNETA_MAX_RXD;
+
--- /dev/null
+From 74c41b048db1073a04827d7f39e95ac1935524cc Mon Sep 17 00:00:00 2001
+From: willy tarreau <w@1wt.eu>
+Date: Thu, 16 Jan 2014 08:20:08 +0100
+Subject: net: mvneta: use per_cpu stats to fix an SMP lock up
+
+From: willy tarreau <w@1wt.eu>
+
+commit 74c41b048db1073a04827d7f39e95ac1935524cc upstream.
+
+Stats writers are mvneta_rx() and mvneta_tx(). They don't lock anything
+when they update the stats, and as a result, it randomly happens that
+the stats freeze on SMP if two updates happen during stats retrieval.
+This is very easily reproducible by starting two HTTP servers and binding
+each of them to a different CPU, then consulting /proc/net/dev in loops
+during transfers, the interface should immediately lock up. This issue
+also randomly happens upon link state changes during transfers, because
+the stats are collected in this situation, but it takes more attempts to
+reproduce it.
+
+The comments in netdevice.h suggest using per_cpu stats instead to get
+rid of this issue.
+
+This patch implements this. It merges both rx_stats and tx_stats into
+a single "stats" member with a single syncp. Both mvneta_rx() and
+mvneta_rx() now only update the a single CPU's counters.
+
+In turn, mvneta_get_stats64() does the summing by iterating over all CPUs
+to get their respective stats.
+
+With this change, stats are still correct and no more lockup is encountered.
+
+Note that this bug was present since the first import of the mvneta
+driver. It might make sense to backport it to some stable trees. If
+so, it depends on "d33dc73 net: mvneta: increase the 64-bit rx/tx stats
+out of the hot path".
+
+Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Arnaud Ebalard <arno@natisbad.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[wt: port to 3.10 : u64_stats_init() does not exist in 3.10 and is not needed]
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 78 +++++++++++++++++++++-------------
+ 1 file changed, 50 insertions(+), 28 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -219,10 +219,12 @@
+
+ #define MVNETA_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD)
+
+-struct mvneta_stats {
++struct mvneta_pcpu_stats {
+ struct u64_stats_sync syncp;
+- u64 packets;
+- u64 bytes;
++ u64 rx_packets;
++ u64 rx_bytes;
++ u64 tx_packets;
++ u64 tx_bytes;
+ };
+
+ struct mvneta_port {
+@@ -248,8 +250,7 @@ struct mvneta_port {
+ u8 mcast_count[256];
+ u16 tx_ring_size;
+ u16 rx_ring_size;
+- struct mvneta_stats tx_stats;
+- struct mvneta_stats rx_stats;
++ struct mvneta_pcpu_stats *stats;
+
+ struct mii_bus *mii_bus;
+ struct phy_device *phy_dev;
+@@ -428,21 +429,29 @@ struct rtnl_link_stats64 *mvneta_get_sta
+ {
+ struct mvneta_port *pp = netdev_priv(dev);
+ unsigned int start;
++ int cpu;
+
+- memset(stats, 0, sizeof(struct rtnl_link_stats64));
+-
+- do {
+- start = u64_stats_fetch_begin_bh(&pp->rx_stats.syncp);
+- stats->rx_packets = pp->rx_stats.packets;
+- stats->rx_bytes = pp->rx_stats.bytes;
+- } while (u64_stats_fetch_retry_bh(&pp->rx_stats.syncp, start));
+-
+-
+- do {
+- start = u64_stats_fetch_begin_bh(&pp->tx_stats.syncp);
+- stats->tx_packets = pp->tx_stats.packets;
+- stats->tx_bytes = pp->tx_stats.bytes;
+- } while (u64_stats_fetch_retry_bh(&pp->tx_stats.syncp, start));
++ for_each_possible_cpu(cpu) {
++ struct mvneta_pcpu_stats *cpu_stats;
++ u64 rx_packets;
++ u64 rx_bytes;
++ u64 tx_packets;
++ u64 tx_bytes;
++
++ cpu_stats = per_cpu_ptr(pp->stats, cpu);
++ do {
++ start = u64_stats_fetch_begin_bh(&cpu_stats->syncp);
++ rx_packets = cpu_stats->rx_packets;
++ rx_bytes = cpu_stats->rx_bytes;
++ tx_packets = cpu_stats->tx_packets;
++ tx_bytes = cpu_stats->tx_bytes;
++ } while (u64_stats_fetch_retry_bh(&cpu_stats->syncp, start));
++
++ stats->rx_packets += rx_packets;
++ stats->rx_bytes += rx_bytes;
++ stats->tx_packets += tx_packets;
++ stats->tx_bytes += tx_bytes;
++ }
+
+ stats->rx_errors = dev->stats.rx_errors;
+ stats->rx_dropped = dev->stats.rx_dropped;
+@@ -1416,10 +1425,12 @@ static int mvneta_rx(struct mvneta_port
+ }
+
+ if (rcvd_pkts) {
+- u64_stats_update_begin(&pp->rx_stats.syncp);
+- pp->rx_stats.packets += rcvd_pkts;
+- pp->rx_stats.bytes += rcvd_bytes;
+- u64_stats_update_end(&pp->rx_stats.syncp);
++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
++
++ u64_stats_update_begin(&stats->syncp);
++ stats->rx_packets += rcvd_pkts;
++ stats->rx_bytes += rcvd_bytes;
++ u64_stats_update_end(&stats->syncp);
+ }
+
+ /* Update rxq management counters */
+@@ -1552,11 +1563,12 @@ static int mvneta_tx(struct sk_buff *skb
+
+ out:
+ if (frags > 0) {
+- u64_stats_update_begin(&pp->tx_stats.syncp);
+- pp->tx_stats.packets++;
+- pp->tx_stats.bytes += skb->len;
+- u64_stats_update_end(&pp->tx_stats.syncp);
++ struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
++ u64_stats_update_begin(&stats->syncp);
++ stats->tx_packets++;
++ stats->tx_bytes += skb->len;
++ u64_stats_update_end(&stats->syncp);
+ } else {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb_any(skb);
+@@ -2758,6 +2770,13 @@ static int mvneta_probe(struct platform_
+
+ clk_prepare_enable(pp->clk);
+
++ /* Alloc per-cpu stats */
++ pp->stats = alloc_percpu(struct mvneta_pcpu_stats);
++ if (!pp->stats) {
++ err = -ENOMEM;
++ goto err_clk;
++ }
++
+ pp->tx_done_timer.data = (unsigned long)dev;
+
+ pp->tx_ring_size = MVNETA_MAX_TXD;
+@@ -2769,7 +2788,7 @@ static int mvneta_probe(struct platform_
+ err = mvneta_init(pp, phy_addr);
+ if (err < 0) {
+ dev_err(&pdev->dev, "can't init eth hal\n");
+- goto err_clk;
++ goto err_free_stats;
+ }
+ mvneta_port_power_up(pp, phy_mode);
+
+@@ -2798,6 +2817,8 @@ static int mvneta_probe(struct platform_
+
+ err_deinit:
+ mvneta_deinit(pp);
++err_free_stats:
++ free_percpu(pp->stats);
+ err_clk:
+ clk_disable_unprepare(pp->clk);
+ err_unmap:
+@@ -2818,6 +2839,7 @@ static int mvneta_remove(struct platform
+ unregister_netdev(dev);
+ mvneta_deinit(pp);
+ clk_disable_unprepare(pp->clk);
++ free_percpu(pp->stats);
+ iounmap(pp->base);
+ irq_dispose_mapping(dev->irq);
+ free_netdev(dev);
staging-vt6655-fix-warning-on-boot-handle_irq_event_percpu.patch
revert-mac80211-move-bufferable-mmpdu-check-to-fix-ap-mode-scan.patch
net-mvneta-increase-the-64-bit-rx-tx-stats-out-of-the-hot-path.patch
+net-mvneta-use-per_cpu-stats-to-fix-an-smp-lock-up.patch
+net-mvneta-do-not-schedule-in-mvneta_tx_timeout.patch
+net-mvneta-add-missing-bit-descriptions-for-interrupt-masks-and-causes.patch
+net-mvneta-replace-tx-timer-with-a-real-interrupt.patch