From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 8 Aug 2016 14:22:31 +0000 (+0200)
Subject: 3.14-stable patches
X-Git-Tag: v3.14.75~8
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c75ef1465ba0f03d38afa1ea17ba54610fc1eb4d;p=thirdparty%2Fkernel%2Fstable-queue.git

3.14-stable patches

added patches:
	libceph-apply-new_state-before-new_up_client-on-incrementals.patch
	net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch
---

diff --git a/queue-3.14/libceph-apply-new_state-before-new_up_client-on-incrementals.patch b/queue-3.14/libceph-apply-new_state-before-new_up_client-on-incrementals.patch
new file mode 100644
index 00000000000..36627f944f7
--- /dev/null
+++ b/queue-3.14/libceph-apply-new_state-before-new_up_client-on-incrementals.patch
@@ -0,0 +1,224 @@
+From 930c532869774ebf8af9efe9484c597f896a7d46 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 19 Jul 2016 03:50:28 +0200
+Subject: libceph: apply new_state before new_up_client on incrementals
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 930c532869774ebf8af9efe9484c597f896a7d46 upstream.
+
+Currently, osd_weight and osd_state fields are updated in the encoding
+order.  This is wrong, because an incremental map may look like e.g.
+
+    new_up_client: { osd=6, addr=... } # set osd_state and addr
+    new_state: { osd=6, xorstate=EXISTS } # clear osd_state
+
+Suppose osd6's current osd_state is EXISTS (i.e. osd6 is down).  After
+applying new_up_client, osd_state is changed to EXISTS | UP.  Carrying
+on with the new_state update, we flip EXISTS and leave osd6 in a weird
+"!EXISTS but UP" state.  A non-existent OSD is considered down by the
+mapping code
+
+2087    for (i = 0; i < pg->pg_temp.len; i++) {
+2088            if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
+2089                    if (ceph_can_shift_osds(pi))
+2090                            continue;
+2091
+2092                    temp->osds[temp->size++] = CRUSH_ITEM_NONE;
+
+and so requests get directed to the second OSD in the set instead of
+the first, resulting in OSD-side errors like:
+
+[WRN] : client.4239 192.168.122.21:0/2444980242 misdirected client.4239.1:2827 pg 2.5df899f2 to osd.4 not [1,4,6] in e680/680
+
+and hung rbds on the client:
+
+[  493.566367] rbd: rbd0: write 400000 at 11cc00000 (0)
+[  493.566805] rbd: rbd0:   result -6 xferred 400000
+[  493.567011] blk_update_request: I/O error, dev rbd0, sector 9330688
+
+The fix is to decouple application from the decoding and:
+- apply new_weight first
+- apply new_state before new_up_client
+- twiddle osd_state flags if marking in
+- clear out some of the state if osd is destroyed
+
+Fixes: http://tracker.ceph.com/issues/14901
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Josh Durgin <jdurgin@redhat.com>
+[idryomov@gmail.com: backport to 3.10-3.14: strip primary-affinity]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osdmap.c |  152 ++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 108 insertions(+), 44 deletions(-)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -825,6 +825,110 @@ bad:
+ }
+ 
+ /*
++ * Encoding order is (new_up_client, new_state, new_weight).  Need to
++ * apply in the (new_weight, new_state, new_up_client) order, because
++ * an incremental map may look like e.g.
++ *
++ *     new_up_client: { osd=6, addr=... } # set osd_state and addr
++ *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
++ */
++static int decode_new_up_state_weight(void **p, void *end,
++				      struct ceph_osdmap *map)
++{
++	void *new_up_client;
++	void *new_state;
++	void *new_weight_end;
++	u32 len;
++
++	new_up_client = *p;
++	ceph_decode_32_safe(p, end, len, e_inval);
++	len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
++	ceph_decode_need(p, end, len, e_inval);
++	*p += len;
++
++	new_state = *p;
++	ceph_decode_32_safe(p, end, len, e_inval);
++	len *= sizeof(u32) + sizeof(u8);
++	ceph_decode_need(p, end, len, e_inval);
++	*p += len;
++
++	/* new_weight */
++	ceph_decode_32_safe(p, end, len, e_inval);
++	while (len--) {
++		s32 osd;
++		u32 w;
++
++		ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
++		osd = ceph_decode_32(p);
++		w = ceph_decode_32(p);
++		BUG_ON(osd >= map->max_osd);
++		pr_info("osd%d weight 0x%x %s\n", osd, w,
++		     w == CEPH_OSD_IN ? "(in)" :
++		     (w == CEPH_OSD_OUT ? "(out)" : ""));
++		map->osd_weight[osd] = w;
++
++		/*
++		 * If we are marking in, set the EXISTS, and clear the
++		 * AUTOOUT and NEW bits.
++		 */
++		if (w) {
++			map->osd_state[osd] |= CEPH_OSD_EXISTS;
++			map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
++						 CEPH_OSD_NEW);
++		}
++	}
++	new_weight_end = *p;
++
++	/* new_state (up/down) */
++	*p = new_state;
++	len = ceph_decode_32(p);
++	while (len--) {
++		s32 osd;
++		u8 xorstate;
++
++		osd = ceph_decode_32(p);
++		xorstate = ceph_decode_8(p);
++		if (xorstate == 0)
++			xorstate = CEPH_OSD_UP;
++		BUG_ON(osd >= map->max_osd);
++		if ((map->osd_state[osd] & CEPH_OSD_UP) &&
++		    (xorstate & CEPH_OSD_UP))
++			pr_info("osd%d down\n", osd);
++		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
++		    (xorstate & CEPH_OSD_EXISTS)) {
++			pr_info("osd%d does not exist\n", osd);
++			map->osd_weight[osd] = CEPH_OSD_IN;
++			memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
++			map->osd_state[osd] = 0;
++		} else {
++			map->osd_state[osd] ^= xorstate;
++		}
++	}
++
++	/* new_up_client */
++	*p = new_up_client;
++	len = ceph_decode_32(p);
++	while (len--) {
++		s32 osd;
++		struct ceph_entity_addr addr;
++
++		osd = ceph_decode_32(p);
++		ceph_decode_copy(p, &addr, sizeof(addr));
++		ceph_decode_addr(&addr);
++		BUG_ON(osd >= map->max_osd);
++		pr_info("osd%d up\n", osd);
++		map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
++		map->osd_addr[osd] = addr;
++	}
++
++	*p = new_weight_end;
++	return 0;
++
++e_inval:
++	return -EINVAL;
++}
++
++/*
+  * decode and apply an incremental map update.
+  */
+ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+@@ -939,50 +1043,10 @@ struct ceph_osdmap *osdmap_apply_increme
+ 			__remove_pg_pool(&map->pg_pools, pi);
+ 	}
+ 
+-	/* new_up */
+-	err = -EINVAL;
+-	ceph_decode_32_safe(p, end, len, bad);
+-	while (len--) {
+-		u32 osd;
+-		struct ceph_entity_addr addr;
+-		ceph_decode_32_safe(p, end, osd, bad);
+-		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad);
+-		ceph_decode_addr(&addr);
+-		pr_info("osd%d up\n", osd);
+-		BUG_ON(osd >= map->max_osd);
+-		map->osd_state[osd] |= CEPH_OSD_UP;
+-		map->osd_addr[osd] = addr;
+-	}
+-
+-	/* new_state */
+-	ceph_decode_32_safe(p, end, len, bad);
+-	while (len--) {
+-		u32 osd;
+-		u8 xorstate;
+-		ceph_decode_32_safe(p, end, osd, bad);
+-		xorstate = **(u8 **)p;
+-		(*p)++;  /* clean flag */
+-		if (xorstate == 0)
+-			xorstate = CEPH_OSD_UP;
+-		if (xorstate & CEPH_OSD_UP)
+-			pr_info("osd%d down\n", osd);
+-		if (osd < map->max_osd)
+-			map->osd_state[osd] ^= xorstate;
+-	}
+-
+-	/* new_weight */
+-	ceph_decode_32_safe(p, end, len, bad);
+-	while (len--) {
+-		u32 osd, off;
+-		ceph_decode_need(p, end, sizeof(u32)*2, bad);
+-		osd = ceph_decode_32(p);
+-		off = ceph_decode_32(p);
+-		pr_info("osd%d weight 0x%x %s\n", osd, off,
+-		     off == CEPH_OSD_IN ? "(in)" :
+-		     (off == CEPH_OSD_OUT ? "(out)" : ""));
+-		if (osd < map->max_osd)
+-			map->osd_weight[osd] = off;
+-	}
++	/* new_up_client, new_state, new_weight */
++	err = decode_new_up_state_weight(p, end, map);
++	if (err)
++		goto bad;
+ 
+ 	/* new_pg_temp */
+ 	ceph_decode_32_safe(p, end, len, bad);
diff --git a/queue-3.14/net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch b/queue-3.14/net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch
new file mode 100644
index 00000000000..919f0886fac
--- /dev/null
+++ b/queue-3.14/net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch
@@ -0,0 +1,47 @@
+From 06708f81528725148473c0869d6af5f809c6824b Mon Sep 17 00:00:00 2001
+From: Dmitri Epshtein <dima@marvell.com>
+Date: Wed, 6 Jul 2016 04:18:58 +0200
+Subject: net: mvneta: set real interrupt per packet for tx_done
+
+From: Dmitri Epshtein <dima@marvell.com>
+
+commit 06708f81528725148473c0869d6af5f809c6824b upstream.
+
+Commit aebea2ba0f74 ("net: mvneta: fix Tx interrupt delay") intended to
+set coalescing threshold to a value guaranteeing interrupt generation
+per each sent packet, so that buffers can be released with no delay.
+
+In fact setting threshold to '1' was wrong, because it causes interrupt
+every two packets. According to the documentation a reason behind it is
+following - interrupt occurs once sent buffers counter reaches a value,
+which is higher than one specified in MVNETA_TXQ_SIZE_REG(q). This
+behavior was confirmed during tests. Also when testing the SoC working
+as a NAS device, better performance was observed with int-per-packet,
+as it strongly depends on the fact that all transmitted packets are
+released immediately.
+
+This commit enables NETA controller work in interrupt per sent packet mode
+by setting coalescing threshold to 0.
+
+Signed-off-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Marcin Wojtas <mw@semihalf.com>
+Fixes aebea2ba0f74 ("net: mvneta: fix Tx interrupt delay")
+Acked-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -213,7 +213,7 @@
+ /* Various constants */
+ 
+ /* Coalescing */
+-#define MVNETA_TXDONE_COAL_PKTS		1
++#define MVNETA_TXDONE_COAL_PKTS		0	/* interrupt per packet */
+ #define MVNETA_RX_COAL_PKTS		32
+ #define MVNETA_RX_COAL_USEC		100
+ 
diff --git a/queue-3.14/series b/queue-3.14/series
index d75ef0de440..bd8da562477 100644
--- a/queue-3.14/series
+++ b/queue-3.14/series
@@ -17,3 +17,5 @@ can-at91_can-rx-queue-could-get-stuck-at-high-bus-load.patch
 can-fix-handling-of-unmodifiable-configuration-options-fix.patch
 can-fix-oops-caused-by-wrong-rtnl-dellink-usage.patch
 ipr-clear-interrupt-on-croc-crocodile-when-running-with-lsi.patch
+libceph-apply-new_state-before-new_up_client-on-incrementals.patch
+net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch